// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef TRANSLIT_H
#define TRANSLIT_H

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

/**
 * \file
 * \brief C++ API: Transforms text from one format to another.
 */

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/parseerr.h"
#include "unicode/utrans.h" // UTransPosition, UTransDirection
#include "unicode/strenum.h"

U_NAMESPACE_BEGIN

class UnicodeFilter;
class UnicodeSet;
class TransliteratorParser;
class NormalizationTransliterator;
class TransliteratorIDParser;

/**
 *
 * <code>Transliterator</code> is an abstract class that
 * transliterates text from one format to another.  The most common
 * kind of transliterator is a script, or alphabet, transliterator.
 * For example, a Russian to Latin transliterator changes Russian text
 * written in Cyrillic characters to phonetically equivalent Latin
 * characters.  It does not <em>translate</em> Russian to English!
 * Transliteration, unlike translation, operates on characters, without
 * reference to the meanings of words and sentences.
 *
 * <p>Although script conversion is its most common use, a
 * transliterator can actually perform a more general class of tasks.
 * In fact, <code>Transliterator</code> defines a very general API
 * which specifies only that a segment of the input text is replaced
 * by new text.  The particulars of this conversion are determined
 * entirely by subclasses of <code>Transliterator</code>.
 *
 * <p><b>Transliterators are stateless</b>
 *
 * <p><code>Transliterator</code> objects are <em>stateless</em>; they
 * retain no information between calls to
 * <code>transliterate()</code>.  (However, this does <em>not</em>
 * mean that threads may share transliterators without synchronizing
 * them.  Transliterators are not immutable, so they must be
 * synchronized when shared between threads.)  This might seem to
 * limit the complexity of the transliteration operation.  In
 * practice, subclasses perform complex transliterations by delaying
 * the replacement of text until it is known that no other
 * replacements are possible.  In other words, although the
 * <code>Transliterator</code> objects are stateless, the source text
 * itself embodies all the needed information, and delayed operation
 * allows arbitrary complexity.
 *
 * <p><b>Batch transliteration</b>
 *
 * <p>The simplest way to perform transliteration is all at once, on a
 * string of existing text.  This is referred to as <em>batch</em>
 * transliteration.  For example, given a string <code>input</code>
 * and a transliterator <code>t</code>, the call
 *
 *     String result = t.transliterate(input);
 *
 * will transliterate it and return the result.  Other methods allow
 * the client to specify a substring to be transliterated and to use
 * {@link Replaceable } objects instead of strings, in order to
 * preserve out-of-band information (such as text styles).
 *
 * <p><b>Keyboard transliteration</b>
 *
 * <p>Somewhat more involved is <em>keyboard</em>, or incremental
 * transliteration.  This is the transliteration of text that is
 * arriving from some source (typically the user's keyboard) one
 * character at a time, or in some other piecemeal fashion.
 *
 * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
 * stores the text.  As text is inserted, as much as possible is
 * transliterated on the fly.  This means a GUI that displays the
 * contents of the buffer may show text being modified as each new
 * character arrives.
 *
 * <p>Consider the simple rule-based Transliterator:
 * <pre>
 *     th>{theta}
 *     t>{tau}
 * </pre>
 *
 * When the user types 't', nothing will happen, since the
 * transliterator is waiting to see if the next character is 'h'.  To
 * remedy this, we introduce the notion of a cursor, marked by a '|'
 * in the output string:
 * <pre>
 *     t>|{tau}
 *     {tau}h>{theta}
 * </pre>
 *
 * Now when the user types 't', tau appears, and if the next character
 * is 'h', the tau changes to a theta.  This is accomplished by
 * maintaining a cursor position (independent of the insertion point,
 * and invisible in the GUI) across calls to
 * <code>transliterate()</code>.  Typically, the cursor will
 * be coincident with the insertion point, but in a case like the one
 * above, it will precede the insertion point.
 *
 * <p>Keyboard transliteration methods maintain a set of three indices
 * that are updated with each call to
 * <code>transliterate()</code>, including the cursor, start,
 * and limit.  Since these indices are changed by the method, they are
 * passed in an <code>int[]</code> array. The <code>START</code> index
 * marks the beginning of the substring that the transliterator will
 * look at.  It is advanced as text becomes committed (but it is not
 * the committed index; that's the <code>CURSOR</code>).  The
 * <code>CURSOR</code> index, described above, marks the point at
 * which the transliterator last stopped, either because it reached
 * the end, or because it required more characters to disambiguate
 * between possible inputs.  The <code>CURSOR</code> can also be
 * explicitly set by rules in a rule-based Transliterator.
 * Any characters before the <code>CURSOR</code> index are frozen;
 * future keyboard transliteration calls within this input sequence
 * will not change them.  New text is inserted at the
 * <code>LIMIT</code> index, which marks the end of the substring that
 * the transliterator looks at.
 *
 * <p>Because keyboard transliteration assumes that more characters
 * are to arrive, it is conservative in its operation.  It only
 * transliterates when it can do so unambiguously.  Otherwise it waits
 * for more characters to arrive.  When the client code knows that no
 * more characters are forthcoming, perhaps because the user has
 * performed some input termination operation, then it should call
 * <code>finishTransliteration()</code> to complete any
 * pending transliterations.
 *
 * <p><b>Inverses</b>
 *
 * <p>Pairs of transliterators may be inverses of one another.  For
 * example, if transliterator <b>A</b> transliterates characters by
 * incrementing their Unicode value (so "abc" -> "def"), and
 * transliterator <b>B</b> decrements character values, then <b>A</b>
 * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
 * with <b>B</b> in a compound transliterator, the result is the
 * identity transliterator, that is, a transliterator that does not
 * change its input text.
 *
 * The <code>Transliterator</code> method <code>getInverse()</code>
 * returns a transliterator's inverse, if one exists, or
 * <code>null</code> otherwise.  However, the result of
 * <code>getInverse()</code> usually will <em>not</em> be a true
 * mathematical inverse.  This is because true inverse transliterators
 * are difficult to formulate.  For example, consider two
 * transliterators: <b>AB</b>, which transliterates the character 'A'
 * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
 * seem that these are exact inverses, since
 *
 * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
 * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
 *
 * where 'x' represents transliteration.  However,
 *
 * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
 * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
 *
 * so <b>AB</b> composed with <b>BA</b> is not the
 * identity. Nonetheless, <b>BA</b> may be usefully considered to be
 * <b>AB</b>'s inverse, and it is on this basis that
 * <b>AB</b><code>.getInverse()</code> could legitimately return
 * <b>BA</b>.
 *
 * <p><b>IDs and display names</b>
 *
 * <p>A transliterator is designated by a short identifier string or
 * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
 * where <em>source</em> describes the entity being replaced, and
 * <em>destination</em> describes the entity replacing
 * <em>source</em>.  The entities may be the names of scripts,
 * particular sequences of characters, or whatever else it is that the
 * transliterator converts to or from.  For example, a transliterator
 * from Russian to Latin might be named "Russian-Latin".  A
 * transliterator from keyboard escape sequences to Latin-1 characters
 * might be named "KeyboardEscape-Latin1".  By convention, system
 * entity names are in English, with the initial letters of words
 * capitalized; user entity names may follow any format so long as
 * they do not contain dashes.
 *
 * <p>In addition to programmatic IDs, transliterator objects have
 * display names for presentation in user interfaces, returned by
 * {@link #getDisplayName }.
 *
 * <p><b>Factory methods and registration</b>
 *
 * <p>In general, client code should use the factory method
 * {@link #createInstance } to obtain an instance of a
 * transliterator given its ID.  Valid IDs may be enumerated using
 * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
 * multiple calls to {@link #createInstance } with the same ID will
 * return distinct objects.
 *
 * <p>In addition to the system transliterators registered at startup,
 * user transliterators may be registered by calling
 * <code>registerInstance()</code> at run time.  A registered instance
 * acts a template; future calls to {@link #createInstance } with the ID
 * of the registered object return clones of that object.  Thus any
 * object passed to <tt>registerInstance()</tt> must implement
 * <tt>clone()</tt> properly.  To register a transliterator subclass
 * without instantiating it (until it is needed), users may call
 * {@link #registerFactory }.  In this case, the objects are
 * instantiated by invoking the zero-argument public constructor of
 * the class.
 *
 * <p><b>Subclassing</b>
 *
 * Subclasses must implement the abstract method
 * <code>handleTransliterate()</code>.  <p>Subclasses should override
 * the <code>transliterate()</code> method taking a
 * <code>Replaceable</code> and the <code>transliterate()</code>
 * method taking a <code>String</code> and <code>StringBuffer</code>
 * if the performance of these methods can be improved over the
 * performance obtained by the default implementations in this class.
 *
 * <p><b>Rule syntax</b>
 *
 * <p>A set of rules determines how to perform translations.
 * Rules within a rule set are separated by semicolons (';').
 * To include a literal semicolon, prefix it with a backslash ('\').
 * Unicode Pattern_White_Space is ignored.
 * If the first non-blank character on a line is '#',
 * the entire line is ignored as a comment.
 *
 * <p>Each set of rules consists of two groups, one forward, and one
 * reverse. This is a convention that is not enforced; rules for one
 * direction may be omitted, with the result that translations in
 * that direction will not modify the source text. In addition,
 * bidirectional forward-reverse rules may be specified for
 * symmetrical transformations.
 *
 * <p>Note: Another description of the Transliterator rule syntax is available in
 * <a href="https://www.unicode.org/reports/tr35/tr35-general.html#Transform_Rules_Syntax">section
 * Transform Rules Syntax of UTS #35: Unicode LDML</a>.
 * The rules are shown there using arrow symbols ← and → and ↔.
 * ICU supports both those and the equivalent ASCII symbols &lt; and &gt; and &lt;&gt;.
 *
 * <p>Rule statements take one of the following forms:
 *
 * <dl>
 *     <dt><code>$alefmadda=\\u0622;</code></dt>
 *     <dd><strong>Variable definition.</strong> The name on the
 *         left is assigned the text on the right. In this example,
 *         after this statement, instances of the left hand name,
 *         &quot;<code>$alefmadda</code>&quot;, will be replaced by
 *         the Unicode character U+0622. Variable names must begin
 *         with a letter and consist only of letters, digits, and
 *         underscores. Case is significant. Duplicate names cause
 *         an exception to be thrown, that is, variables cannot be
 *         redefined. The right hand side may contain well-formed
 *         text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
 *         The right hand side may contain embedded <code>UnicodeSet</code>
 *         patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
 *     <dt><code>ai&gt;$alefmadda;</code></dt>
 *     <dd><strong>Forward translation rule.</strong> This rule
 *         states that the string on the left will be changed to the
 *         string on the right when performing forward
 *         transliteration.</dd>
 *     <dt><code>ai&lt;$alefmadda;</code></dt>
 *     <dd><strong>Reverse translation rule.</strong> This rule
 *         states that the string on the right will be changed to
 *         the string on the left when performing reverse
 *         transliteration.</dd>
 * </dl>
 *
 * <dl>
 *     <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
 *     <dd><strong>Bidirectional translation rule.</strong> This
 *         rule states that the string on the right will be changed
 *         to the string on the left when performing forward
 *         transliteration, and vice versa when performing reverse
 *         transliteration.</dd>
 * </dl>
 *
 * <p>Translation rules consist of a <em>match pattern</em> and an <em>output
 * string</em>. The match pattern consists of literal characters,
 * optionally preceded by context, and optionally followed by
 * context. Context characters, like literal pattern characters,
 * must be matched in the text being transliterated. However, unlike
 * literal pattern characters, they are not replaced by the output
 * text. For example, the pattern &quot;<code>abc{def}</code>&quot;
 * indicates the characters &quot;<code>def</code>&quot; must be
 * preceded by &quot;<code>abc</code>&quot; for a successful match.
 * If there is a successful match, &quot;<code>def</code>&quot; will
 * be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
 * is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
 * &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
 * (or &quot;<code>123}456</code>&quot;) in which the literal
 * pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
 *
 * <p>The output string of a forward or reverse rule consists of
 * characters to replace the literal pattern characters. If the
 * output string contains the character '<code>|</code>', this is
 * taken to indicate the location of the <em>cursor</em> after
 * replacement. The cursor is the point in the text at which the
 * next replacement, if any, will be applied. The cursor is usually
 * placed within the replacement text; however, it can actually be
 * placed into the preceding or following context by using the
 * special character '@'. Examples:
 *
 * <pre>
 *     a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor before a
 *     {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between y and z
 * </pre>
 *
 * <p><b>UnicodeSet</b>
 *
 * <p><code>UnicodeSet</code> patterns may appear anywhere that
 * makes sense. They may appear in variable definitions.
 * Contrariwise, <code>UnicodeSet</code> patterns may themselves
 * contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
 * or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.
 *
 * <p><code>UnicodeSet</code> patterns may also be embedded directly
 * into rule strings. Thus, the following two rules are equivalent:
 *
 * <pre>
 *     $vowel=[aeiou]; $vowel&gt;'*'; # One way to do this
 *     [aeiou]&gt;'*'; # Another way
 * </pre>
 *
 * <p>See {@link UnicodeSet} for more documentation and examples.
 *
 * <p><b>Segments</b>
 *
 * <p>Segments of the input string can be matched and copied to the
 * output string. This makes certain sets of rules simpler and more
 * general, and makes reordering possible. For example:
 *
 * <pre>
 *     ([a-z]) &gt; $1 $1; # double lowercase letters
 *     ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs
 * </pre>
 *
 * <p>The segment of the input string to be copied is delimited by
 * &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
 * nine segments may be defined. Segments may not overlap. In the
 * output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
 * represent the input string segments, in left-to-right order of
 * definition.
 *
 * <p><b>Anchors</b>
 *
 * <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
 * special characters '<code>^</code>' and '<code>$</code>'. For example:
 *
 * <pre>
 *   ^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text
 *   &nbsp; a&nbsp;&nbsp; &gt; 'A'; # match other instances of 'a'
 *   &nbsp; z $ &gt; 'END_Z'; &nbsp;&nbsp;# match 'z' at end of text
 *   &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances of 'z'
 * </pre>
 *
 * <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
 * This is done by including a virtual anchor character '<code>$</code>' at the end of the
 * set pattern. Although this is usually the match character for the end anchor, the set will
 * match either the beginning or the end of the text, depending on its placement. For
 * example:
 *
 * <pre>
 *   $x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor
 *   $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start
 *   &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end
 * </pre>
 *
 * <p><b>Example</b>
 *
 * <p>The following example rules illustrate many of the features of
 * the rule language.
 *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td style="vertical-align: top;">Rule 1.</td>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}&gt;x|y</code></td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top;">Rule 2.</td>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>xyz&gt;r</code></td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top;">Rule 3.</td>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>yz&gt;q</code></td>
 *     </tr>
 * </table>
 *
 * <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
 * yields the following results:
 *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
 *         <td style="vertical-align: top;">Initial state, no rules match. Advance
 *         cursor.</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
 *         <td style="vertical-align: top;">Still no match. Rule 1 does not match
 *         because the preceding context is not present.</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
 *         <td style="vertical-align: top;">Still no match. Keep advancing until
 *         there is a match...</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
 *         <td style="vertical-align: top;">...</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
 *         <td style="vertical-align: top;">...</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
 *         <td style="vertical-align: top;">...</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
 *         <td style="vertical-align: top;">...</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
 *         <td style="vertical-align: top;">Rule 1 matches; replace &quot;<code>def</code>&quot;
 *         with &quot;<code>xy</code>&quot; and back up the cursor
 *         to before the '<code>y</code>'.</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
 *         <td style="vertical-align: top;">Although &quot;<code>xyz</code>&quot; is
 *         present, rule 2 does not match because the cursor is
 *         before the '<code>y</code>', not before the '<code>x</code>'.
 *         Rule 3 does match. Replace &quot;<code>yz</code>&quot;
 *         with &quot;<code>q</code>&quot;.</td>
 *     </tr>
 *     <tr>
 *         <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
 *         <td style="vertical-align: top;">The cursor is at the end;
 *         transliteration is complete.</td>
 *     </tr>
 * </table>
 *
 * <p>The order of rules is significant. If multiple rules may match
 * at some point, the first matching rule is applied.
 *
 * <p>Forward and reverse rules may have an empty output string.
 * Otherwise, an empty left or right hand side of any statement is a
 * syntax error.
 *
 * <p>Single quotes are used to quote any character other than a
 * digit or letter. To specify a single quote itself, inside or
 * outside of quotes, use two single quotes in a row. For example,
 * the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
 * string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
 *
 * <p><b>Notes</b>
 *
 * <p>While a Transliterator is being built from rules, it checks that
 * the rules are added in proper order. For example, if the rule
 * &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
 * then the second rule will throw an exception. The reason is that
 * the second rule can never be triggered, since the first rule
 * always matches anything it matches. In other words, the first
 * rule <em>masks</em> the second rule.
 *
 * @author Alan Liu
 * @stable ICU 2.0
 */
class U_I18N_API Transliterator : public UObject {

private:

    /**
     * Programmatic name, e.g., "Latin-Arabic".
     */
    UnicodeString ID;

    /**
     * This transliterator's filter.  Any character for which
     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     */
    UnicodeFilter* filter;

    int32_t maximumContextLength;

 public:

    /**
     * A context integer or pointer for a factory function, passed by
     * value.
     * @stable ICU 2.4
     */
    union Token {
        /**
         * This token, interpreted as a 32-bit integer.
         * @stable ICU 2.4
         */
        int32_t integer;
        /**
         * This token, interpreted as a native pointer.
         * @stable ICU 2.4
         */
        void*   pointer;
    };

#ifndef U_HIDE_INTERNAL_API
    /**
     * Return a token containing an integer.
     * @return a token containing an integer.
     * @internal
     */
    inline static Token integerToken(int32_t);

    /**
     * Return a token containing a pointer.
     * @return a token containing a pointer.
     * @internal
     */
    inline static Token pointerToken(void*);
#endif  /* U_HIDE_INTERNAL_API */

    /**
     * A function that creates and returns a Transliterator.  When
     * invoked, it will be passed the ID string that is being
     * instantiated, together with the context pointer that was passed
     * in when the factory function was first registered.  Many
     * factory functions will ignore both parameters, however,
     * functions that are registered to more than one ID may use the
     * ID or the context parameter to parameterize the transliterator
     * they create.
     * @param ID      the string identifier for this transliterator
     * @param context a context pointer that will be stored and
     *                later passed to the factory function when an ID matching
     *                the registration ID is being instantiated with this factory.
     * @stable ICU 2.4
     */
    typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);

protected:

    /**
     * Default constructor.
     * @param ID the string identifier for this transliterator
     * @param adoptedFilter the filter.  Any character for which
     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     * @stable ICU 2.4
     */
    Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);

    /**
     * Copy constructor.
     * @stable ICU 2.4
     */
    Transliterator(const Transliterator&);

    /**
     * Assignment operator.
     * @stable ICU 2.4
     */
    Transliterator& operator=(const Transliterator&);

    /**
     * Create a transliterator from a basic ID.  This is an ID
     * containing only the forward direction source, target, and
     * variant.
     * @param id a basic ID of the form S-T or S-T/V.
     * @param canon canonical ID to assign to the object, or
     * NULL to leave the ID unchanged
     * @return a newly created Transliterator or null if the ID is
     * invalid.
     * @stable ICU 2.4
     */
    static Transliterator* createBasicInstance(const UnicodeString& id,
                                               const UnicodeString* canon);

    friend class TransliteratorParser; // for parseID()
    friend class TransliteratorIDParser; // for createBasicInstance()
    friend class TransliteratorAlias; // for setID()

public:

    /**
     * Destructor.
     * @stable ICU 2.0
     */
    virtual ~Transliterator();

    /**
     * Implements Cloneable.
     * All subclasses are encouraged to implement this method if it is
     * possible and reasonable to do so.  Subclasses that are to be
     * registered with the system using <tt>registerInstance()</tt>
     * are required to implement this method.  If a subclass does not
     * implement clone() properly and is registered with the system
     * using registerInstance(), then the default clone() implementation
     * will return null, and calls to createInstance() will fail.
     *
     * @return a copy of the object.
     * @see #registerInstance
     * @stable ICU 2.0
     */
    virtual Transliterator* clone() const;

    /**
     * Transliterates a segment of a string, with optional filtering.
     *
     * @param text the string to be transliterated
     * @param start the beginning index, inclusive; <code>0 <= start
     * <= limit</code>.
     * @param limit the ending index, exclusive; <code>start <= limit
     * <= text.length()</code>.
     * @return The new limit index.  The text previously occupying <code>[start,
     * limit)</code> has been transliterated, possibly to a string of a different
     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
     * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
     * the returned value is -1 and the input string remains unchanged.
     * @stable ICU 2.0
     */
    virtual int32_t transliterate(Replaceable& text,
                                  int32_t start, int32_t limit) const;

    /**
     * Transliterates an entire string in place. Convenience method.
     * @param text the string to be transliterated
     * @stable ICU 2.0
     */
    virtual void transliterate(Replaceable& text) const;

    /**
     * Transliterates the portion of the text buffer that can be
     * transliterated unambiguosly after new text has been inserted,
     * typically as a result of a keyboard event.  The new text in
     * <code>insertion</code> will be inserted into <code>text</code>
     * at <code>index.limit</code>, advancing
     * <code>index.limit</code> by <code>insertion.length()</code>.
     * Then the transliterator will try to transliterate characters of
     * <code>text</code> between <code>index.cursor</code> and
     * <code>index.limit</code>.  Characters before
     * <code>index.cursor</code> will not be changed.
     *
     * <p>Upon return, values in <code>index</code> will be updated.
     * <code>index.start</code> will be advanced to the first
     * character that future calls to this method will read.
     * <code>index.cursor</code> and <code>index.limit</code> will
     * be adjusted to delimit the range of text that future calls to
     * this method may change.
     *
     * <p>Typical usage of this method begins with an initial call
     * with <code>index.start</code> and <code>index.limit</code>
     * set to indicate the portion of <code>text</code> to be
     * transliterated, and <code>index.cursor == index.start</code>.
     * Thereafter, <code>index</code> can be used without
     * modification in future calls, provided that all changes to
     * <code>text</code> are made via this method.
     *
     * <p>This method assumes that future calls may be made that will
     * insert new text into the buffer.  As a result, it only performs
     * unambiguous transliterations.  After the last call to this
     * method, there may be untransliterated text that is waiting for
     * more input to resolve an ambiguity.  In order to perform these
     * pending transliterations, clients should call
     * {@link #finishTransliteration } after the last call to this
     * method has been made.
     *
     * @param text the buffer holding transliterated and untransliterated text
     * @param index an array of three integers.
     *
     * <ul><li><code>index.start</code>: the beginning index,
     * inclusive; <code>0 <= index.start <= index.limit</code>.
     *
     * <li><code>index.limit</code>: the ending index, exclusive;
     * <code>index.start <= index.limit <= text.length()</code>.
     * <code>insertion</code> is inserted at
     * <code>index.limit</code>.
     *
     * <li><code>index.cursor</code>: the next character to be
     * considered for transliteration; <code>index.start <=
     * index.cursor <= index.limit</code>.  Characters before
     * <code>index.cursor</code> will not be changed by future calls
     * to this method.</ul>
     *
     * @param insertion text to be inserted and possibly
     * transliterated into the translation buffer at
     * <code>index.limit</code>.  If <code>null</code> then no text
     * is inserted.
     * @param status    Output param to filled in with a success or an error.
     * @see #handleTransliterate
     * @exception IllegalArgumentException if <code>index</code>
     * is invalid
     * @see UTransPosition
     * @stable ICU 2.0
     */
    virtual void transliterate(Replaceable& text, UTransPosition& index,
                               const UnicodeString& insertion,
                               UErrorCode& status) const;

    /**
     * Transliterates the portion of the text buffer that can be
     * transliterated unambiguosly after a new character has been
     * inserted, typically as a result of a keyboard event.  This is a
     * convenience method.
     * @param text the buffer holding transliterated and
     * untransliterated text
     * @param index an array of three integers.
     * @param insertion text to be inserted and possibly
     * transliterated into the translation buffer at
     * <code>index.limit</code>.
     * @param status    Output param to filled in with a success or an error.
     * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
     * @stable ICU 2.0
     */
    virtual void transliterate(Replaceable& text, UTransPosition& index,
                               UChar32 insertion,
                               UErrorCode& status) const;

    /**
     * Transliterates the portion of the text buffer that can be
     * transliterated unambiguosly.  This is a convenience method; see
     * {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
     * for details.
     * @param text the buffer holding transliterated and
     * untransliterated text
     * @param index an array of three integers.
     * @param status    Output param to filled in with a success or an error.
     * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode &) const
     * @stable ICU 2.0
     */
    virtual void transliterate(Replaceable& text, UTransPosition& index,
                               UErrorCode& status) const;

    /**
     * Finishes any pending transliterations that were waiting for
     * more characters.  Clients should call this method as the last
     * call after a sequence of one or more calls to
     * <code>transliterate()</code>.
     * @param text the buffer holding transliterated and
     * untransliterated text.
     * @param index the array of indices previously passed to {@link #transliterate }
     * @stable ICU 2.0
     */
    virtual void finishTransliteration(Replaceable& text,
                                       UTransPosition& index) const;

private:

    /**
     * This internal method does incremental transliteration.  If the
     * 'insertion' is non-null then we append it to 'text' before
     * proceeding.  This method calls through to the pure virtual
     * framework method handleTransliterate() to do the actual
     * work.
     * @param text the buffer holding transliterated and
     * untransliterated text
     * @param index an array of three integers.  See {@link
     * #transliterate(Replaceable, int[], String)}.
     * @param insertion text to be inserted and possibly
     * transliterated into the translation buffer at
     * <code>index.limit</code>.
     * @param status    Output param to filled in with a success or an error.
     */
    void _transliterate(Replaceable& text,
                        UTransPosition& index,
                        const UnicodeString* insertion,
                        UErrorCode &status) const;

protected:

    /**
     * Abstract method that concrete subclasses define to implement
     * their transliteration algorithm.  This method handles both
     * incremental and non-incremental transliteration.  Let
     * <code>originalStart</code> refer to the value of
     * <code>pos.start</code> upon entry.
     *
     * <ul>
     *  <li>If <code>incremental</code> is false, then this method
     *  should transliterate all characters between
     *  <code>pos.start</code> and <code>pos.limit</code>. Upon return
     *  <code>pos.start</code> must == <code> pos.limit</code>.</li>
     *
     *  <li>If <code>incremental</code> is true, then this method
     *  should transliterate all characters between
     *  <code>pos.start</code> and <code>pos.limit</code> that can be
     *  unambiguously transliterated, regardless of future insertions
     *  of text at <code>pos.limit</code>.  Upon return,
     *  <code>pos.start</code> should be in the range
     *  [<code>originalStart</code>, <code>pos.limit</code>).
     *  <code>pos.start</code> should be positioned such that
     *  characters [<code>originalStart</code>, <code>
     *  pos.start</code>) will not be changed in the future by this
     *  transliterator and characters [<code>pos.start</code>,
     *  <code>pos.limit</code>) are unchanged.</li>
     * </ul>
     *
     * <p>Implementations of this method should also obey the
     * following invariants:</p>
     *
     * <ul>
     *  <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
     *  should be updated to reflect changes in length of the text
     *  between <code>pos.start</code> and <code>pos.limit</code>. The
     *  difference <code> pos.contextLimit - pos.limit</code> should
     *  not change.</li>
     *
     *  <li><code>pos.contextStart</code> should not change.</li>
     *
     *  <li>Upon return, neither <code>pos.start</code> nor
     *  <code>pos.limit</code> should be less than
     *  <code>originalStart</code>.</li>
     *
     *  <li>Text before <code>originalStart</code> and text after
     *  <code>pos.limit</code> should not change.</li>
     *
     *  <li>Text before <code>pos.contextStart</code> and text after
     *  <code> pos.contextLimit</code> should be ignored.</li>
     * </ul>
     *
     * <p>Subclasses may safely assume that all characters in
     * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
     * In other words, the filter has already been applied by the time
     * this method is called.  See
     * <code>filteredTransliterate()</code>.
     *
     * <p>This method is <b>not</b> for public consumption.  Calling
     * this method directly will transliterate
     * [<code>pos.start</code>, <code>pos.limit</code>) without
     * applying the filter. End user code should call <code>
     * transliterate()</code> instead of this method. Subclass code
     * and wrapping transliterators should call
     * <code>filteredTransliterate()</code> instead of this method.<p>
     *
     * @param text the buffer holding transliterated and
     * untransliterated text
     *
     * @param pos the indices indicating the start, limit, context
     * start, and context limit of the text.
     *
     * @param incremental if true, assume more text may be inserted at
     * <code>pos.limit</code> and act accordingly.  Otherwise,
     * transliterate all text between <code>pos.start</code> and
     * <code>pos.limit</code> and move <code>pos.start</code> up to
     * <code>pos.limit</code>.
     *
     * @see #transliterate
     * @stable ICU 2.4
     */
    virtual void handleTransliterate(Replaceable& text,
                                     UTransPosition& pos,
                                     UBool incremental) const = 0;

public:
    /**
     * Transliterate a substring of text, as specified by index, taking filters
     * into account.  This method is for subclasses that need to delegate to
     * another transliterator.
     * @param text the text to be transliterated
     * @param index the position indices
     * @param incremental if true, then assume more characters may be inserted
     * at index.limit, and postpone processing to accommodate future incoming
     * characters
     * @stable ICU 2.4
     */
    virtual void filteredTransliterate(Replaceable& text,
                                       UTransPosition& index,
                                       UBool incremental) const;

private:

    /**
     * Top-level transliteration method, handling filtering, incremental and
     * non-incremental transliteration, and rollback.  All transliteration
     * public API methods eventually call this method with a rollback argument
     * of true.  Other entities may call this method but rollback should be
     * false.
     *
     * <p>If this transliterator has a filter, break up the input text into runs
     * of unfiltered characters.  Pass each run to
     * subclass.handleTransliterate().
     *
     * <p>In incremental mode, if rollback is true, perform a special
     * incremental procedure in which several passes are made over the input
     * text, adding one character at a time, and committing successful
     * transliterations as they occur.  Unsuccessful transliterations are rolled
     * back and retried with additional characters to give correct results.
     *
     * @param text the text to be transliterated
     * @param index the position indices
     * @param incremental if true, then assume more characters may be inserted
     * at index.limit, and postpone processing to accommodate future incoming
     * characters
     * @param rollback if true and if incremental is true, then perform special
     * incremental processing, as described above, and undo partial
     * transliterations where necessary.  If incremental is false then this
     * parameter is ignored.
     */
    virtual void filteredTransliterate(Replaceable& text,
                                       UTransPosition& index,
                                       UBool incremental,
                                       UBool rollback) const;

public:

    /**
     * Returns the length of the longest context required by this transliterator.
     * This is <em>preceding</em> context.  The default implementation supplied
     * by <code>Transliterator</code> returns zero; subclasses
     * that use preceding context should override this method to return the
     * correct value.  For example, if a transliterator translates "ddd" (where
     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
     * context length is 5, the length of "(ddd)".
     *
     * @return The maximum number of preceding context characters this
     * transliterator needs to examine
     * @stable ICU 2.0
     */
    int32_t getMaximumContextLength(void) const;

protected:

    /**
     * Method for subclasses to use to set the maximum context length.
     * @param maxContextLength the new value to be set.
     * @see #getMaximumContextLength
     * @stable ICU 2.4
     */
    void setMaximumContextLength(int32_t maxContextLength);

public:

    /**
     * Returns a programmatic identifier for this transliterator.
     * If this identifier is passed to <code>createInstance()</code>, it
     * will return this object, if it has been registered.
     * @return a programmatic identifier for this transliterator.
     * @see #registerInstance
     * @see #registerFactory
     * @see #getAvailableIDs
     * @stable ICU 2.0
     */
    virtual const UnicodeString& getID(void) const;

    /**
     * Returns a name for this transliterator that is appropriate for
     * display to the user in the default locale.  See {@link #getDisplayName }
     * for details.
     * @param ID     the string identifier for this transliterator
     * @param result Output param to receive the display name
     * @return       A reference to 'result'.
     * @stable ICU 2.0
     */
    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
                                         UnicodeString& result);

    /**
     * Returns a name for this transliterator that is appropriate for
     * display to the user in the given locale.  This name is taken
     * from the locale resource data in the standard manner of the
     * <code>java.text</code> package.
     *
     * <p>If no localized names exist in the system resource bundles,
     * a name is synthesized using a localized
     * <code>MessageFormat</code> pattern from the resource data.  The
     * arguments to this pattern are an integer followed by one or two
     * strings.  The integer is the number of strings, either 1 or 2.
     * The strings are formed by splitting the ID for this
     * transliterator at the first '-'.  If there is no '-', then the
     * entire ID forms the only string.
     * @param ID       the string identifier for this transliterator
     * @param inLocale the Locale in which the display name should be
     *                 localized.
     * @param result   Output param to receive the display name
     * @return         A reference to 'result'.
     * @stable ICU 2.0
     */
    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
                                         const Locale& inLocale,
                                         UnicodeString& result);

    /**
     * Returns the filter used by this transliterator, or <tt>NULL</tt>
     * if this transliterator uses no filter.
     * @return the filter used by this transliterator, or <tt>NULL</tt>
     *         if this transliterator uses no filter.
     * @stable ICU 2.0
     */
    const UnicodeFilter* getFilter(void) const;

    /**
     * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
     * transliterator uses no filter.  The caller must eventually delete the
     * result.  After this call, this transliterator's filter is set to
     * <tt>NULL</tt>.
     * @return the filter used by this transliterator, or <tt>NULL</tt> if this
     *         transliterator uses no filter.
     * @stable ICU 2.4
     */
    UnicodeFilter* orphanFilter(void);

    /**
     * Changes the filter used by this transliterator.  If the filter
     * is set to <tt>null</tt> then no filtering will occur.
     *
     * <p>Callers must take care if a transliterator is in use by
     * multiple threads.  The filter should not be changed by one
     * thread while another thread may be transliterating.
     * @param adoptedFilter the new filter to be adopted.
     * @stable ICU 2.0
     */
    void adoptFilter(UnicodeFilter* adoptedFilter);

    /**
     * Returns this transliterator's inverse.  See the class
     * documentation for details.  This implementation simply inverts
     * the two entities in the ID and attempts to retrieve the
     * resulting transliterator.  That is, if <code>getID()</code>
     * returns "A-B", then this method will return the result of
     * <code>createInstance("B-A")</code>, or <code>null</code> if that
     * call fails.
     *
     * <p>Subclasses with knowledge of their inverse may wish to
     * override this method.
     *
     * @param status Output param to filled in with a success or an error.
     * @return a transliterator that is an inverse, not necessarily
     * exact, of this transliterator, or <code>null</code> if no such
     * transliterator is registered.
     * @see #registerInstance
     * @stable ICU 2.0
     */
    Transliterator* createInverse(UErrorCode& status) const;

    /**
     * Returns a <code>Transliterator</code> object given its ID.
     * The ID must be either a system transliterator ID or a ID registered
     * using <code>registerInstance()</code>.
     *
     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
     * @param dir        either FORWARD or REVERSE.
     * @param parseError Struct to receive information on position
     *                   of error if an error is encountered
     * @param status     Output param to filled in with a success or an error.
     * @return A <code>Transliterator</code> object with the given ID
     * @see #registerInstance
     * @see #getAvailableIDs
     * @see #getID
     * @stable ICU 2.0
     */
    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
                                          UTransDirection dir,
                                          UParseError& parseError,
                                          UErrorCode& status);

    /**
     * Returns a <code>Transliterator</code> object given its ID.
     * The ID must be either a system transliterator ID or a ID registered
     * using <code>registerInstance()</code>.
     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
     * @param dir        either FORWARD or REVERSE.
     * @param status     Output param to filled in with a success or an error.
     * @return A <code>Transliterator</code> object with the given ID
     * @stable ICU 2.0
     */
    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
                                          UTransDirection dir,
                                          UErrorCode& status);

    /**
     * Returns a <code>Transliterator</code> object constructed from
     * the given rule string.  This will be a rule-based Transliterator,
     * if the rule string contains only rules, or a
     * compound Transliterator, if it contains ID blocks, or a
     * null Transliterator, if it contains ID blocks which parse as
     * empty for the given direction.
     *
     * @param ID            the id for the transliterator.
     * @param rules         rules, separated by ';'
     * @param dir           either FORWARD or REVERSE.
     * @param parseError    Struct to receive information on position
     *                      of error if an error is encountered
     * @param status        Output param set to success/failure code.
     * @return a newly created Transliterator
     * @stable ICU 2.0
     */
    static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
                                           const UnicodeString& rules,
                                           UTransDirection dir,
                                           UParseError& parseError,
                                           UErrorCode& status);

    /**
     * Create a rule string that can be passed to createFromRules()
     * to recreate this transliterator.
     * @param result the string to receive the rules.  Previous
     * contents will be deleted.
     * @param escapeUnprintable if true then convert unprintable
     * character to their hex escape representations, \\uxxxx or
     * \\Uxxxxxxxx.  Unprintable characters are those other than
     * U+000A, U+0020..U+007E.
     * @stable ICU 2.0
     */
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

    /**
     * Return the number of elements that make up this transliterator.
     * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
     * were created, the return value of this method would be 3.
     *
     * <p>If this transliterator is not composed of other
     * transliterators, then this method returns 1.
     * @return the number of transliterators that compose this
     * transliterator, or 1 if this transliterator is not composed of
     * multiple transliterators
     * @stable ICU 3.0
     */
    int32_t countElements() const;

    /**
     * Return an element that makes up this transliterator.  For
     * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
     * were created, the return value of this method would be one
     * of the three transliterator objects that make up that
     * transliterator: [NFD, Jamo-Latin, Latin-Greek].
     *
     * <p>If this transliterator is not composed of other
     * transliterators, then this method will return a reference to
     * this transliterator when given the index 0.
     * @param index a value from 0..countElements()-1 indicating the
     * transliterator to return
     * @param ec input-output error code
     * @return one of the transliterators that makes up this
     * transliterator, if this transliterator is made up of multiple
     * transliterators, otherwise a reference to this object if given
     * an index of 0
     * @stable ICU 3.0
     */
    const Transliterator& getElement(int32_t index, UErrorCode& ec) const;

    /**
     * Returns the set of all characters that may be modified in the
     * input text by this Transliterator.  This incorporates this
     * object's current filter; if the filter is changed, the return
     * value of this function will change.  The default implementation
     * returns an empty set.  Some subclasses may override
     * {@link #handleGetSourceSet } to return a more precise result. The
     * return result is approximate in any case and is intended for
     * use by tests, tools, or utilities.
     * @param result receives result set; previous contents lost
     * @return a reference to result
     * @see #getTargetSet
     * @see #handleGetSourceSet
     * @stable ICU 2.4
     */
    UnicodeSet& getSourceSet(UnicodeSet& result) const;

    /**
     * Framework method that returns the set of all characters that
     * may be modified in the input text by this Transliterator,
     * ignoring the effect of this object's filter.  The base class
     * implementation returns the empty set.  Subclasses that wish to
     * implement this should override this method.
     * @return the set of characters that this transliterator may
     * modify.  The set may be modified, so subclasses should return a
     * newly-created object.
     * @param result receives result set; previous contents lost
     * @see #getSourceSet
     * @see #getTargetSet
     * @stable ICU 2.4
     */
    virtual void handleGetSourceSet(UnicodeSet& result) const;

    /**
     * Returns the set of all characters that may be generated as
     * replacement text by this transliterator.  The default
     * implementation returns the empty set.  Some subclasses may
     * override this method to return a more precise result.  The
     * return result is approximate in any case and is intended for
     * use by tests, tools, or utilities requiring such
     * meta-information.
     * @param result receives result set; previous contents lost
     * @return a reference to result
     * @see #getTargetSet
     * @stable ICU 2.4
     */
    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;

public:

    /**
     * Registers a factory function that creates transliterators of
     * a given ID.
     *
     * Because ICU may choose to cache Transliterators internally, this must
     * be called at application startup, prior to any calls to
     * Transliterator::createXXX to avoid undefined behavior.
     *
     * @param id the ID being registered
     * @param factory a function pointer that will be copied and
     * called later when the given ID is passed to createInstance()
     * @param context a context pointer that will be stored and
     * later passed to the factory function when an ID matching
     * the registration ID is being instantiated with this factory.
     * @stable ICU 2.0
     */
    static void U_EXPORT2 registerFactory(const UnicodeString& id,
                                Factory factory,
                                Token context);

    /**
     * Registers an instance <tt>obj</tt> of a subclass of
     * <code>Transliterator</code> with the system.  When
     * <tt>createInstance()</tt> is called with an ID string that is
     * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
     * returned.
     *
     * After this call the Transliterator class owns the adoptedObj
     * and will delete it.
     *
     * Because ICU may choose to cache Transliterators internally, this must
     * be called at application startup, prior to any calls to
     * Transliterator::createXXX to avoid undefined behavior.
     *
     * @param adoptedObj an instance of subclass of
     * <code>Transliterator</code> that defines <tt>clone()</tt>
     * @see #createInstance
     * @see #registerFactory
     * @see #unregister
     * @stable ICU 2.0
     */
    static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);

    /**
     * Registers an ID string as an alias of another ID string.
     * That is, after calling this function, <tt>createInstance(aliasID)</tt>
     * will return the same thing as <tt>createInstance(realID)</tt>.
     * This is generally used to create shorter, more mnemonic aliases
     * for long compound IDs.
     *
     * @param aliasID The new ID being registered.
     * @param realID The ID that the new ID is to be an alias for.
     * This can be a compound ID and can include filters and should
     * refer to transliterators that have already been registered with
     * the framework, although this isn't checked.
     * @stable ICU 3.6
     */
     static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
                                         const UnicodeString& realID);

protected:

#ifndef U_HIDE_INTERNAL_API
    /**
     * @param id the ID being registered
     * @param factory a function pointer that will be copied and
     * called later when the given ID is passed to createInstance()
     * @param context a context pointer that will be stored and
     * later passed to the factory function when an ID matching
     * the registration ID is being instantiated with this factory.
     * @internal
     */
    static void _registerFactory(const UnicodeString& id,
                                 Factory factory,
                                 Token context);

    /**
     * @internal
     */
    static void _registerInstance(Transliterator* adoptedObj);

    /**
     * @internal
     */
    static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);

    /**
     * Register two targets as being inverses of one another.  For
     * example, calling registerSpecialInverse("NFC", "NFD", true) causes
     * Transliterator to form the following inverse relationships:
     *
     * <pre>NFC => NFD
     * Any-NFC => Any-NFD
     * NFD => NFC
     * Any-NFD => Any-NFC</pre>
     *
     * (Without the special inverse registration, the inverse of NFC
     * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
     * that the presence or absence of "Any-" is preserved.
     *
     * <p>The relationship is symmetrical; registering (a, b) is
     * equivalent to registering (b, a).
     *
     * <p>The relevant IDs must still be registered separately as
     * factories or classes.
     *
     * <p>Only the targets are specified.  Special inverses always
     * have the form Any-Target1 <=> Any-Target2.  The target should
     * have canonical casing (the casing desired to be produced when
     * an inverse is formed) and should contain no whitespace or other
     * extraneous characters.
     *
     * @param target the target against which to register the inverse
     * @param inverseTarget the inverse of target, that is
     * Any-target.getInverse() => Any-inverseTarget
     * @param bidirectional if true, register the reverse relation
     * as well, that is, Any-inverseTarget.getInverse() => Any-target
     * @internal
     */
    static void _registerSpecialInverse(const UnicodeString& target,
                                        const UnicodeString& inverseTarget,
                                        UBool bidirectional);
#endif  /* U_HIDE_INTERNAL_API */

public:

    /**
     * Unregisters a transliterator or class.  This may be either
     * a system transliterator or a user transliterator or class.
     * Any attempt to construct an unregistered transliterator based
     * on its ID will fail.
     *
     * Because ICU may choose to cache Transliterators internally, this should
     * be called during application shutdown, after all calls to
     * Transliterator::createXXX to avoid undefined behavior.
     *
     * @param ID the ID of the transliterator or class
     * @return the <code>Object</code> that was registered with
     * <code>ID</code>, or <code>null</code> if none was
     * @see #registerInstance
     * @see #registerFactory
     * @stable ICU 2.0
     */
    static void U_EXPORT2 unregister(const UnicodeString& ID);

public:

    /**
     * Return a StringEnumeration over the IDs available at the time of the
     * call, including user-registered IDs.
     * @param ec input-output error code
     * @return a newly-created StringEnumeration over the transliterators
     * available at the time of the call. The caller should delete this object
     * when done using it.
     * @stable ICU 3.0
     */
    static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);

    /**
     * Return the number of registered source specifiers.
     * @return the number of registered source specifiers.
     * @stable ICU 2.0
     */
    static int32_t U_EXPORT2 countAvailableSources(void);

    /**
     * Return a registered source specifier.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableSources()
     * @param result fill-in parameter to receive the source specifier.
     * If index is out of range, result will be empty.
     * @return reference to result
     * @stable ICU 2.0
     */
    static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
                                             UnicodeString& result);

    /**
     * Return the number of registered target specifiers for a given
     * source specifier.
     * @param source the given source specifier.
     * @return the number of registered target specifiers for a given
     *         source specifier.
     * @stable ICU 2.0
     */
    static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);

    /**
     * Return a registered target specifier for a given source.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableTargets(source)
     * @param source the source specifier
     * @param result fill-in parameter to receive the target specifier.
     * If source is invalid or if index is out of range, result will
     * be empty.
     * @return reference to result
     * @stable ICU 2.0
     */
    static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
                                             const UnicodeString& source,
                                             UnicodeString& result);

    /**
     * Return the number of registered variant specifiers for a given
     * source-target pair.
     * @param source    the source specifiers.
     * @param target    the target specifiers.
     * @stable ICU 2.0
     */
    static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
                                          const UnicodeString& target);

    /**
     * Return a registered variant specifier for a given source-target
     * pair.
     * @param index which specifier to return, from 0 to n-1, where
     * n = countAvailableVariants(source, target)
     * @param source the source specifier
     * @param target the target specifier
     * @param result fill-in parameter to receive the variant
     * specifier.  If source is invalid or if target is invalid or if
     * index is out of range, result will be empty.
     * @return reference to result
     * @stable ICU 2.0
     */
    static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
                                              const UnicodeString& source,
                                              const UnicodeString& target,
                                              UnicodeString& result);

protected:

#ifndef U_HIDE_INTERNAL_API
    /**
     * Non-mutexed internal method
     * @internal
     */
    static int32_t _countAvailableSources(void);

    /**
     * Non-mutexed internal method
     * @internal
     */
    static UnicodeString& _getAvailableSource(int32_t index,
                                              UnicodeString& result);

    /**
     * Non-mutexed internal method
     * @internal
     */
    static int32_t _countAvailableTargets(const UnicodeString& source);

    /**
     * Non-mutexed internal method
     * @internal
     */
    static UnicodeString& _getAvailableTarget(int32_t index,
                                              const UnicodeString& source,
                                              UnicodeString& result);

    /**
     * Non-mutexed internal method
     * @internal
     */
    static int32_t _countAvailableVariants(const UnicodeString& source,
                                           const UnicodeString& target);

    /**
     * Non-mutexed internal method
     * @internal
     */
    static UnicodeString& _getAvailableVariant(int32_t index,
                                               const UnicodeString& source,
                                               const UnicodeString& target,
                                               UnicodeString& result);
#endif  /* U_HIDE_INTERNAL_API */

protected:

    /**
     * Set the ID of this transliterators.  Subclasses shouldn't do
     * this, unless the underlying script behavior has changed.
     * @param id the new id t to be set.
     * @stable ICU 2.4
     */
    void setID(const UnicodeString& id);

public:

    /**
     * Return the class ID for this class.  This is useful only for
     * comparing to a return value from getDynamicClassID().
     * Note that Transliterator is an abstract base class, and therefor
     * no fully constructed object will  have a dynamic
     * UCLassID that equals the UClassID returned from
     * TRansliterator::getStaticClassID().
     * @return       The class ID for class Transliterator.
     * @stable ICU 2.0
     */
    static UClassID U_EXPORT2 getStaticClassID(void);

    /**
     * Returns a unique class ID <b>polymorphically</b>.  This method
     * is to implement a simple version of RTTI, since not all C++
     * compilers support genuine RTTI.  Polymorphic operator==() and
     * clone() methods call this method.
     *
     * <p>Concrete subclasses of Transliterator must use the
     *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
     *    uobject.h to provide the RTTI functions.
     *
     * @return The class ID for this object. All objects of a given
     * class have the same class ID.  Objects of other classes have
     * different class IDs.
     * @stable ICU 2.0
     */
    virtual UClassID getDynamicClassID(void) const override = 0;

private:
    static UBool initializeRegistry(UErrorCode &status);

public:
#ifndef U_HIDE_OBSOLETE_API
    /**
     * Return the number of IDs currently registered with the system.
     * To retrieve the actual IDs, call getAvailableID(i) with
     * i from 0 to countAvailableIDs() - 1.
     * @return the number of IDs currently registered with the system.
     * @obsolete ICU 3.4 use getAvailableIDs() instead
     */
    static int32_t U_EXPORT2 countAvailableIDs(void);

    /**
     * Return the index-th available ID.  index must be between 0
     * and countAvailableIDs() - 1, inclusive.  If index is out of
     * range, the result of getAvailableID(0) is returned.
     * @param index the given ID index.
     * @return      the index-th available ID.  index must be between 0
     *              and countAvailableIDs() - 1, inclusive.  If index is out of
     *              range, the result of getAvailableID(0) is returned.
     * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
     * is not thread safe, since it returns a reference to storage that
     * may become invalid if another thread calls unregister
     */
    static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
#endif  /* U_HIDE_OBSOLETE_API */
};

inline int32_t Transliterator::getMaximumContextLength(void) const {
    return maximumContextLength;
}

inline void Transliterator::setID(const UnicodeString& id) {
    ID = id;
    // NUL-terminate the ID string, which is a non-aliased copy.
    ID.append((char16_t)0);
    ID.truncate(ID.length()-1);
}

#ifndef U_HIDE_INTERNAL_API
inline Transliterator::Token Transliterator::integerToken(int32_t i) {
    Token t;
    t.integer = i;
    return t;
}

inline Transliterator::Token Transliterator::pointerToken(void* p) {
    Token t;
    t.pointer = p;
    return t;
}
#endif  /* U_HIDE_INTERNAL_API */

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif /* U_SHOW_CPLUSPLUS_API */

#endif
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           // © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1999-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  ubidi.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999jul27
*   created by: Markus W. Scherer, updated by Matitiahu Allouche
*/

#ifndef UBIDI_H
#define UBIDI_H

#include "unicode/utypes.h"
#include "unicode/uchar.h"

#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
#endif   // U_SHOW_CPLUSPLUS_API

/**
 *\file
 * \brief C API: Bidi algorithm
 *
 * <h2>Bidi algorithm for ICU</h2>
 *
 * This is an implementation of the Unicode Bidirectional Algorithm.
 * The algorithm is defined in the
 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
 *
 * Note: Libraries that perform a bidirectional algorithm and
 * reorder strings accordingly are sometimes called "Storage Layout Engines".
 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
 * "Storage Layout Engines".
 *
 * <h3>General remarks about the API:</h3>
 *
 * In functions with an error code parameter,
 * the <code>pErrorCode</code> pointer must be valid
 * and the value that it points to must not indicate a failure before
 * the function call. Otherwise, the function returns immediately.
 * After the function call, the value indicates success or failure.<p>
 *
 * The &quot;limit&quot; of a sequence of characters is the position just after their
 * last character, i.e., one more than that position.<p>
 *
 * Some of the API functions provide access to &quot;runs&quot;.
 * Such a &quot;run&quot; is defined as a sequence of characters
 * that are at the same embedding level
 * after performing the Bidi algorithm.<p>
 *
 * @author Markus W. Scherer
 * @version 1.0
 *
 *
 * <h4> Sample code for the ICU Bidi API </h4>
 *
 * <h5>Rendering a paragraph with the ICU Bidi API</h5>
 *
 * This is (hypothetical) sample code that illustrates
 * how the ICU Bidi API could be used to render a paragraph of text.
 * Rendering code depends highly on the graphics system,
 * therefore this sample code must make a lot of assumptions,
 * which may or may not match any existing graphics system's properties.
 *
 * <p>The basic assumptions are:</p>
 * <ul>
 * <li>Rendering is done from left to right on a horizontal line.</li>
 * <li>A run of single-style, unidirectional text can be rendered at once.</li>
 * <li>Such a run of text is passed to the graphics system with
 *     characters (code units) in logical order.</li>
 * <li>The line-breaking algorithm is very complicated
 *     and Locale-dependent -
 *     and therefore its implementation omitted from this sample code.</li>
 * </ul>
 *
 * <pre>
 * \code
 *#include <unicode/ubidi.h>
 *
 *typedef enum {
 *     styleNormal=0, styleSelected=1,
 *     styleBold=2, styleItalics=4,
 *     styleSuper=8, styleSub=16
 *} Style;
 *
 *typedef struct { int32_t limit; Style style; } StyleRun;
 *
 *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
 *                  const StyleRun *styleRuns, int styleRunCount);
 *
 * // set *pLimit and *pStyleRunLimit for a line
 * // from text[start] and from styleRuns[styleRunStart]
 * // using ubidi_getLogicalRun(para, ...)
 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
 *                  UBiDi *para,
 *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
 *                  int *pLineWidth);
 *
 * // render runs on a line sequentially, always from left to right
 *
 * // prepare rendering a new line
 * void startLine(UBiDiDirection textDirection, int lineWidth);
 *
 * // render a run of text and advance to the right by the run width
 * // the text[start..limit-1] is always in logical order
 * void renderRun(const UChar *text, int32_t start, int32_t limit,
 *               UBiDiDirection textDirection, Style style);
 *
 * // We could compute a cross-product
 * // from the style runs with the directional runs
 * // and then reorder it.
 * // Instead, here we iterate over each run type
 * // and render the intersections -
 * // with shortcuts in simple (and common) cases.
 * // renderParagraph() is the main function.
 *
 * // render a directional run with
 * // (possibly) multiple style runs intersecting with it
 * void renderDirectionalRun(const UChar *text,
 *                           int32_t start, int32_t limit,
 *                           UBiDiDirection direction,
 *                           const StyleRun *styleRuns, int styleRunCount) {
 *     int i;
 *
 *     // iterate over style runs
 *     if(direction==UBIDI_LTR) {
 *         int styleLimit;
 *
 *         for(i=0; i<styleRunCount; ++i) {
 *             styleLimit=styleRuns[i].limit;
 *             if(start<styleLimit) {
 *                 if(styleLimit>limit) { styleLimit=limit; }
 *                 renderRun(text, start, styleLimit,
 *                           direction, styleRuns[i].style);
 *                 if(styleLimit==limit) { break; }
 *                 start=styleLimit;
 *             }
 *         }
 *     } else {
 *         int styleStart;
 *
 *         for(i=styleRunCount-1; i>=0; --i) {
 *             if(i>0) {
 *                 styleStart=styleRuns[i-1].limit;
 *             } else {
 *                 styleStart=0;
 *             }
 *             if(limit>=styleStart) {
 *                 if(styleStart<start) { styleStart=start; }
 *                 renderRun(text, styleStart, limit,
 *                           direction, styleRuns[i].style);
 *                 if(styleStart==start) { break; }
 *                 limit=styleStart;
 *             }
 *         }
 *     }
 * }
 *
 * // the line object represents text[start..limit-1]
 * void renderLine(UBiDi *line, const UChar *text,
 *                 int32_t start, int32_t limit,
 *                 const StyleRun *styleRuns, int styleRunCount,
 *                 UErrorCode *pErrorCode) {
 *     UBiDiDirection direction=ubidi_getDirection(line);
 *     if(direction!=UBIDI_MIXED) {
 *         // unidirectional
 *         if(styleRunCount<=1) {
 *             renderRun(text, start, limit, direction, styleRuns[0].style);
 *         } else {
 *             renderDirectionalRun(text, start, limit,
 *                                  direction, styleRuns, styleRunCount);
 *         }
 *     } else {
 *         // mixed-directional
 *         int32_t count, i, length;
 *         UBiDiLevel level;
 *
 *         count=ubidi_countRuns(line, pErrorCode);
 *         if(U_SUCCESS(*pErrorCode)) {
 *             if(styleRunCount<=1) {
 *                 Style style=styleRuns[0].style;
 *
 *                 // iterate over directional runs
 *                for(i=0; i<count; ++i) {
 *                    direction=ubidi_getVisualRun(line, i, &start, &length);
 *                     renderRun(text, start, start+length, direction, style);
 *                }
 *             } else {
 *                 int32_t j;
 *
 *                 // iterate over both directional and style runs
 *                 for(i=0; i<count; ++i) {
 *                     direction=ubidi_getVisualRun(line, i, &start, &length);
 *                     renderDirectionalRun(text, start, start+length,
 *                                          direction, styleRuns, styleRunCount);
 *                 }
 *             }
 *         }
 *     }
 * }
 *
 *void renderParagraph(const UChar *text, int32_t length,
 *                     UBiDiDirection textDirection,
 *                      const StyleRun *styleRuns, int styleRunCount,
 *                      int lineWidth,
 *                      UErrorCode *pErrorCode) {
 *     UBiDi *para;
 *
 *     if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
 *         return;
 *     }
 *
 *     para=ubidi_openSized(length, 0, pErrorCode);
 *     if(para==NULL) { return; }
 *
 *     ubidi_setPara(para, text, length,
 *                   textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
 *                   NULL, pErrorCode);
 *     if(U_SUCCESS(*pErrorCode)) {
 *         UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
 *         StyleRun styleRun={ length, styleNormal };
 *         int width;
 *
 *         if(styleRuns==NULL || styleRunCount<=0) {
 *            styleRunCount=1;
 *             styleRuns=&styleRun;
 *         }
 *
 *        // assume styleRuns[styleRunCount-1].limit>=length
 *
 *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
 *         if(width<=lineWidth) {
 *             // everything fits onto one line
 *
 *            // prepare rendering a new line from either left or right
 *             startLine(paraLevel, width);
 *
 *             renderLine(para, text, 0, length,
 *                        styleRuns, styleRunCount, pErrorCode);
 *         } else {
 *             UBiDi *line;
 *
 *             // we need to render several lines
 *             line=ubidi_openSized(length, 0, pErrorCode);
 *             if(line!=NULL) {
 *                 int32_t start=0, limit;
 *                 int styleRunStart=0, styleRunLimit;
 *
 *                 for(;;) {
 *                     limit=length;
 *                     styleRunLimit=styleRunCount;
 *                     getLineBreak(text, start, &limit, para,
 *                                  styleRuns, styleRunStart, &styleRunLimit,
 *                                 &width);
 *                     ubidi_setLine(para, start, limit, line, pErrorCode);
 *                     if(U_SUCCESS(*pErrorCode)) {
 *                         // prepare rendering a new line
 *                         // from either left or right
 *                         startLine(paraLevel, width);
 *
 *                         renderLine(line, text, start, limit,
 *                                    styleRuns+styleRunStart,
 *                                    styleRunLimit-styleRunStart, pErrorCode);
 *                     }
 *                     if(limit==length) { break; }
 *                     start=limit;
 *                     styleRunStart=styleRunLimit-1;
 *                     if(start>=styleRuns[styleRunStart].limit) {
 *                         ++styleRunStart;
 *                     }
 *                 }
 *
 *                 ubidi_close(line);
 *             }
 *        }
 *    }
 *
 *     ubidi_close(para);
 *}
 *\endcode
 * </pre>
 */

/*DOCXX_TAG*/
/*@{*/

/**
 * UBiDiLevel is the type of the level values in this
 * Bidi implementation.
 * It holds an embedding level and indicates the visual direction
 * by its bit&nbsp;0 (even/odd value).<p>
 *
 * It can also hold non-level values for the
 * <code>paraLevel</code> and <code>embeddingLevels</code>
 * arguments of <code>ubidi_setPara()</code>; there:
 * <ul>
 * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
 * value indicates whether the using application is
 * specifying the level of a character to <i>override</i> whatever the
 * Bidi implementation would resolve it to.</li>
 * <li><code>paraLevel</code> can be set to the
 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
 * and <code>UBIDI_DEFAULT_RTL</code>.</li>
 * </ul>
 *
 * @see ubidi_setPara
 *
 * <p>The related constants are not real, valid level values.
 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
 * a default for the paragraph level for
 * when the <code>ubidi_setPara()</code> function
 * shall determine it but there is no
 * strongly typed character in the input.<p>
 *
 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
 * just like with normal LTR and RTL level values -
 * these special values are designed that way. Also, the implementation
 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
 *
 * Note: The numeric values of the related constants will not change:
 * They are tied to the use of 7-bit byte values (plus the override bit)
 * and of the UBiDiLevel=uint8_t data type in this API.
 *
 * @see UBIDI_DEFAULT_LTR
 * @see UBIDI_DEFAULT_RTL
 * @see UBIDI_LEVEL_OVERRIDE
 * @see UBIDI_MAX_EXPLICIT_LEVEL
 * @stable ICU 2.0
 */
typedef uint8_t UBiDiLevel;

/** Paragraph level setting.<p>
 *
 * Constant indicating that the base direction depends on the first strong
 * directional character in the text according to the Unicode Bidirectional
 * Algorithm. If no strong directional character is present,
 * then set the paragraph level to 0 (left-to-right).<p>
 *
 * If this value is used in conjunction with reordering modes
 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 * is assumed to be visual LTR, and the text after reordering is required
 * to be the corresponding logical string with appropriate contextual
 * direction. The direction of the result string will be RTL if either
 * the righmost or leftmost strong character of the source text is RTL
 * or Arabic Letter, the direction will be LTR otherwise.<p>
 *
 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
 * be added at the beginning of the result string to ensure round trip
 * (that the result string, when reordered back to visual, will produce
 * the original source text).
 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 * @stable ICU 2.0
 */
#define UBIDI_DEFAULT_LTR 0xfe

/** Paragraph level setting.<p>
 *
 * Constant indicating that the base direction depends on the first strong
 * directional character in the text according to the Unicode Bidirectional
 * Algorithm. If no strong directional character is present,
 * then set the paragraph level to 1 (right-to-left).<p>
 *
 * If this value is used in conjunction with reordering modes
 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 * is assumed to be visual LTR, and the text after reordering is required
 * to be the corresponding logical string with appropriate contextual
 * direction. The direction of the result string will be RTL if either
 * the righmost or leftmost strong character of the source text is RTL
 * or Arabic Letter, or if the text contains no strong character;
 * the direction will be LTR otherwise.<p>
 *
 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
 * be added at the beginning of the result string to ensure round trip
 * (that the result string, when reordered back to visual, will produce
 * the original source text).
 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 * @stable ICU 2.0
 */
#define UBIDI_DEFAULT_RTL 0xff

/**
 * Maximum explicit embedding level.
 * Same as the max_depth value in the
 * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
 * @stable ICU 2.0
 */
#define UBIDI_MAX_EXPLICIT_LEVEL 125

/** Bit flag for level input.
 *  Overrides directional properties.
 * @stable ICU 2.0
 */
#define UBIDI_LEVEL_OVERRIDE 0x80

/**
 * Special value which can be returned by the mapping functions when a logical
 * index has no corresponding visual index or vice-versa. This may happen
 * for the logical-to-visual mapping of a Bidi control when option
 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
 * @see ubidi_getVisualIndex
 * @see ubidi_getVisualMap
 * @see ubidi_getLogicalIndex
 * @see ubidi_getLogicalMap
 * @stable ICU 3.6
 */
#define UBIDI_MAP_NOWHERE   (-1)

/**
 * <code>UBiDiDirection</code> values indicate the text direction.
 * @stable ICU 2.0
 */
enum UBiDiDirection {
  /** Left-to-right text. This is a 0 value.
   * <ul>
   * <li>As return value for <code>ubidi_getDirection()</code>, it means
   *     that the source string contains no right-to-left characters, or
   *     that the source string is empty and the paragraph level is even.
   * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
   *      means that the first strong character of the source string has
   *      a left-to-right direction.
   * </ul>
   * @stable ICU 2.0
   */
  UBIDI_LTR,
  /** Right-to-left text. This is a 1 value.
   * <ul>
   * <li>As return value for <code>ubidi_getDirection()</code>, it means
   *     that the source string contains no left-to-right characters, or
   *     that the source string is empty and the paragraph level is odd.
   * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
   *      means that the first strong character of the source string has
   *      a right-to-left direction.
   * </ul>
   * @stable ICU 2.0
   */
  UBIDI_RTL,
  /** Mixed-directional text.
   * <p>As return value for <code>ubidi_getDirection()</code>, it means
   *    that the source string contains both left-to-right and
   *    right-to-left characters.
   * @stable ICU 2.0
   */
  UBIDI_MIXED,
  /** No strongly directional text.
   * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
   *    that the source string is missing or empty, or contains neither left-to-right
   *    nor right-to-left characters.
   * @stable ICU 4.6
   */
  UBIDI_NEUTRAL
};

/** @stable ICU 2.0 */
typedef enum UBiDiDirection UBiDiDirection;

/**
 * Forward declaration of the <code>UBiDi</code> structure for the declaration of
 * the API functions. Its fields are implementation-specific.<p>
 * This structure holds information about a paragraph (or multiple paragraphs)
 * of text with Bidi-algorithm-related details, or about one line of
 * such a paragraph.<p>
 * Reordering can be done on a line, or on one or more paragraphs which are
 * then interpreted each as one single line.
 * @stable ICU 2.0
 */
struct UBiDi;

/** @stable ICU 2.0 */
typedef struct UBiDi UBiDi;

/**
 * Allocate a <code>UBiDi</code> structure.
 * Such an object is initially empty. It is assigned
 * the Bidi properties of a piece of text containing one or more paragraphs
 * by <code>ubidi_setPara()</code>
 * or the Bidi properties of a line within a paragraph by
 * <code>ubidi_setLine()</code>.<p>
 * This object can be reused for as long as it is not deallocated
 * by calling <code>ubidi_close()</code>.<p>
 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
 * additional memory for internal structures as necessary.
 *
 * @return An empty <code>UBiDi</code> object.
 * @stable ICU 2.0
 */
U_CAPI UBiDi * U_EXPORT2
ubidi_open(void);

/**
 * Allocate a <code>UBiDi</code> structure with preallocated memory
 * for internal structures.
 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
 * with no arguments, but it also preallocates memory for internal structures
 * according to the sizings supplied by the caller.<p>
 * Subsequent functions will not allocate any more memory, and are thus
 * guaranteed not to fail because of lack of memory.<p>
 * The preallocation can be limited to some of the internal memory
 * by setting some values to 0 here. That means that if, e.g.,
 * <code>maxRunCount</code> cannot be reasonably predetermined and should not
 * be set to <code>maxLength</code> (the only failproof value) to avoid
 * wasting memory, then <code>maxRunCount</code> could be set to 0 here
 * and the internal structures that are associated with it will be allocated
 * on demand, just like with <code>ubidi_open()</code>.
 *
 * @param maxLength is the maximum text or line length that internal memory
 *        will be preallocated for. An attempt to associate this object with a
 *        longer text will fail, unless this value is 0, which leaves the allocation
 *        up to the implementation.
 *
 * @param maxRunCount is the maximum anticipated number of same-level runs
 *        that internal memory will be preallocated for. An attempt to access
 *        visual runs on an object that was not preallocated for as many runs
 *        as the text was actually resolved to will fail,
 *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
 *        The number of runs depends on the actual text and maybe anywhere between
 *        1 and <code>maxLength</code>. It is typically small.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return An empty <code>UBiDi</code> object with preallocated memory.
 * @stable ICU 2.0
 */
U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);

/**
 * <code>ubidi_close()</code> must be called to free the memory
 * associated with a UBiDi object.<p>
 *
 * <strong>Important: </strong>
 * A parent <code>UBiDi</code> object must not be destroyed or reused if
 * it still has children.
 * If a <code>UBiDi</code> object has become the <i>child</i>
 * of another one (its <i>parent</i>) by calling
 * <code>ubidi_setLine()</code>, then the child object must
 * be destroyed (closed) or reused (by calling
 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
 * before the parent object.
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 *
 * @see ubidi_setPara
 * @see ubidi_setLine
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_close(UBiDi *pBiDi);

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUBiDiPointer
 * "Smart pointer" class, closes a UBiDi via ubidi_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 4.4
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);

U_NAMESPACE_END

#endif

/**
 * Modify the operation of the Bidi algorithm such that it
 * approximates an "inverse Bidi" algorithm. This function
 * must be called before <code>ubidi_setPara()</code>.
 *
 * <p>The normal operation of the Bidi algorithm as described
 * in the Unicode Technical Report is to take text stored in logical
 * (keyboard, typing) order and to determine the reordering of it for visual
 * rendering.
 * Some legacy systems store text in visual order, and for operations
 * with standard, Unicode-based algorithms, the text needs to be transformed
 * to logical order. This is effectively the inverse algorithm of the
 * described Bidi algorithm. Note that there is no standard algorithm for
 * this "inverse Bidi" and that the current implementation provides only an
 * approximation of "inverse Bidi".</p>
 *
 * <p>With <code>isInverse</code> set to <code>true</code>,
 * this function changes the behavior of some of the subsequent functions
 * in a way that they can be used for the inverse Bidi algorithm.
 * Specifically, runs of text with numeric characters will be treated in a
 * special way and may need to be surrounded with LRM characters when they are
 * written in reordered sequence.</p>
 *
 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
 * Since the actual input for "inverse Bidi" is visually ordered text and
 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
 * the runs of the logically ordered output.</p>
 *
 * <p>Calling this function with argument <code>isInverse</code> set to
 * <code>true</code> is equivalent to calling
 * <code>ubidi_setReorderingMode</code> with argument
 * <code>reorderingMode</code>
 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
 * Calling this function with argument <code>isInverse</code> set to
 * <code>false</code> is equivalent to calling
 * <code>ubidi_setReorderingMode</code> with argument
 * <code>reorderingMode</code>
 * set to <code>#UBIDI_REORDER_DEFAULT</code>.
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 *
 * @param isInverse specifies "forward" or "inverse" Bidi operation.
 *
 * @see ubidi_setPara
 * @see ubidi_writeReordered
 * @see ubidi_setReorderingMode
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);

/**
 * Is this Bidi object set to perform the inverse Bidi algorithm?
 * <p>Note: calling this function after setting the reordering mode with
 * <code>ubidi_setReorderingMode</code> will return <code>true</code> if the
 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
 * <code>false</code> for all other values.</p>
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @return true if the Bidi object is set to perform the inverse Bidi algorithm
 * by handling numbers as L.
 *
 * @see ubidi_setInverse
 * @see ubidi_setReorderingMode
 * @stable ICU 2.0
 */

U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi *pBiDi);

/**
 * Specify whether block separators must be allocated level zero,
 * so that successive paragraphs will progress from left to right.
 * This function must be called before <code>ubidi_setPara()</code>.
 * Paragraph separators (B) may appear in the text.  Setting them to level zero
 * means that all paragraph separators (including one possibly appearing
 * in the last text position) are kept in the reordered text after the text
 * that they follow in the source text.
 * When this feature is not enabled, a paragraph separator at the last
 * position of the text before reordering will go to the first position
 * of the reordered text when the paragraph level is odd.
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 *
 * @param orderParagraphsLTR specifies whether paragraph separators (B) must
 * receive level 0, so that successive paragraphs progress from left to right.
 *
 * @see ubidi_setPara
 * @stable ICU 3.4
 */
U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);

/**
 * Is this Bidi object set to allocate level 0 to block separators so that
 * successive paragraphs progress from left to right?
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @return true if the Bidi object is set to allocate level 0 to block
 *         separators.
 *
 * @see ubidi_orderParagraphsLTR
 * @stable ICU 3.4
 */
U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);

/**
 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
 * algorithm to use.
 *
 * @see ubidi_setReorderingMode
 * @stable ICU 3.6
 */
typedef enum UBiDiReorderingMode {
    /** Regular Logical to Visual Bidi algorithm according to Unicode.
      * This is a 0 value.
      * @stable ICU 3.6 */
    UBIDI_REORDER_DEFAULT = 0,
    /** Logical to Visual algorithm which handles numbers in a way which
      * mimics the behavior of Windows XP.
      * @stable ICU 3.6 */
    UBIDI_REORDER_NUMBERS_SPECIAL,
    /** Logical to Visual algorithm grouping numbers with adjacent R characters
      * (reversible algorithm).
      * @stable ICU 3.6 */
    UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
    /** Reorder runs only to transform a Logical LTR string to the Logical RTL
      * string with the same display, or vice-versa.<br>
      * If this mode is set together with option
      * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
      * text may be removed and other controls may be added to produce the
      * minimum combination which has the required display.
      * @stable ICU 3.6 */
    UBIDI_REORDER_RUNS_ONLY,
    /** Visual to Logical algorithm which handles numbers like L
      * (same algorithm as selected by <code>ubidi_setInverse(true)</code>.
      * @see ubidi_setInverse
      * @stable ICU 3.6 */
    UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
    /** Visual to Logical algorithm equivalent to the regular Logical to Visual
      * algorithm.
      * @stable ICU 3.6 */
    UBIDI_REORDER_INVERSE_LIKE_DIRECT,
    /** Inverse Bidi (Visual to Logical) algorithm for the
      * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
      * @stable ICU 3.6 */
    UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * Number of values for reordering mode.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UBIDI_REORDER_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UBiDiReorderingMode;

/**
 * Modify the operation of the Bidi algorithm such that it implements some
 * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
 * algorithm, depending on different values of the "reordering mode".
 * This function must be called before <code>ubidi_setPara()</code>, and stays
 * in effect until called again with a different argument.
 *
 * <p>The normal operation of the Bidi algorithm as described
 * in the Unicode Standard Annex #9 is to take text stored in logical
 * (keyboard, typing) order and to determine how to reorder it for visual
 * rendering.</p>
 *
 * <p>With the reordering mode set to a value other than
 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
 * some of the subsequent functions in a way such that they implement an
 * inverse Bidi algorithm or some other algorithm variants.</p>
 *
 * <p>Some legacy systems store text in visual order, and for operations
 * with standard, Unicode-based algorithms, the text needs to be transformed
 * into logical order. This is effectively the inverse algorithm of the
 * described Bidi algorithm. Note that there is no standard algorithm for
 * this "inverse Bidi", so a number of variants are implemented here.</p>
 *
 * <p>In other cases, it may be desirable to emulate some variant of the
 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
 * Logical to Logical transformation.</p>
 *
 * <ul>
 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
 * the standard Bidi Logical to Visual algorithm is applied.</li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
 * the algorithm used to perform Bidi transformations when calling
 * <code>ubidi_setPara</code> should approximate the algorithm used in
 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
 * algorithm.
 * <br>
 * The differences between the basic algorithm and the algorithm addressed
 * by this option are as follows:
 * <ul>
 *   <li>Within text at an even embedding level, the sequence "123AB"
 *   (where AB represent R or AL letters) is transformed to "123BA" by the
 *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
 *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
 *   like regular numbers (EN).</li>
 * </ul></li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
 * numbers located between LTR text and RTL text are associated with the RTL
 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
 * upper case letters represent RTL characters) will be transformed to
 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
 * This makes the algorithm reversible and makes it useful when round trip
 * (from visual to logical and back to visual) must be achieved without
 * adding LRM characters. However, this is a variation from the standard
 * Unicode Bidi algorithm.<br>
 * The source text should not contain Bidi control characters other than LRM
 * or RLM.</li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
 * a "Logical to Logical" transformation must be performed:
 * <ul>
 * <li>If the default text level of the source text (argument <code>paraLevel</code>
 * in <code>ubidi_setPara</code>) is even, the source text will be handled as
 * LTR logical text and will be transformed to the RTL logical text which has
 * the same LTR visual display.</li>
 * <li>If the default level of the source text is odd, the source text
 * will be handled as RTL logical text and will be transformed to the
 * LTR logical text which has the same LTR visual display.</li>
 * </ul>
 * This mode may be needed when logical text which is basically Arabic or
 * Hebrew, with possible included numbers or phrases in English, has to be
 * displayed as if it had an even embedding level (this can happen if the
 * displaying application treats all text as if it was basically LTR).
 * <br>
 * This mode may also be needed in the reverse case, when logical text which is
 * basically English, with possible included phrases in Arabic or Hebrew, has to
 * be displayed as if it had an odd embedding level.
 * <br>
 * Both cases could be handled by adding LRE or RLE at the head of the text,
 * if the display subsystem supports these formatting controls. If it does not,
 * the problem may be handled by transforming the source text in this mode
 * before displaying it, so that it will be displayed properly.<br>
 * The source text should not contain Bidi control characters other than LRM
 * or RLM.</li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
 * is applied.
 * Runs of text with numeric characters will be treated like LTR letters and
 * may need to be surrounded with LRM characters when they are written in
 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
 * be used with function <code>ubidi_writeReordered</code> to this end. This
 * mode is equivalent to calling <code>ubidi_setInverse()</code> with
 * argument <code>isInverse</code> set to <code>true</code>.</li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
 * but is closer to the regular Bidi algorithm.
 * <br>
 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
 * upper case represents RTL characters) will be transformed to
 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
 * When used in conjunction with option
 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
 * adds Bidi marks to the output significantly more sparingly than mode
 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
 * <code>ubidi_writeReordered</code>.</li>
 *
 * <li>When the reordering mode is set to
 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
 * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
 * <br>
 * For example, an LTR paragraph with the content "abc FED123" (where
 * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
 * </ul>
 *
 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
 * output runs should be retrieved using
 * <code>ubidi_getVisualRun()</code>, and the output text with
 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
 * "inverse Bidi" modes the input is actually visually ordered text and
 * reordered output returned by <code>ubidi_getVisualRun()</code> or
 * <code>ubidi_writeReordered()</code> are actually runs or character string
 * of logically ordered output.<br>
 * For all the "inverse Bidi" modes, the source text should not contain
 * Bidi control characters other than LRM or RLM.</p>
 *
 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
 * <code>ubidi_writeReordered</code> has no useful meaning and should not be
 * used in conjunction with any value of the reordering mode specifying
 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @param reorderingMode specifies the required variant of the Bidi algorithm.
 *
 * @see UBiDiReorderingMode
 * @see ubidi_setInverse
 * @see ubidi_setPara
 * @see ubidi_writeReordered
 * @stable ICU 3.6
 */
U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);

/**
 * What is the requested reordering mode for a given Bidi object?
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @return the current reordering mode of the Bidi object
 * @see ubidi_setReorderingMode
 * @stable ICU 3.6
 */
U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi *pBiDi);

/**
 * <code>UBiDiReorderingOption</code> values indicate which options are
 * specified to affect the Bidi algorithm.
 *
 * @see ubidi_setReorderingOptions
 * @stable ICU 3.6
 */
typedef enum UBiDiReorderingOption {
    /**
     * option value for <code>ubidi_setReorderingOptions</code>:
     * disable all the options which can be set with this function
     * @see ubidi_setReorderingOptions
     * @stable ICU 3.6
     */
    UBIDI_OPTION_DEFAULT = 0,

    /**
     * option bit for <code>ubidi_setReorderingOptions</code>:
     * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
     * a reordering to a Logical order
     *
     * <p>This option must be set or reset before calling
     * <code>ubidi_setPara</code>.</p>
     *
     * <p>This option is significant only with reordering modes which generate
     * a result with Logical order, specifically:</p>
     * <ul>
     *   <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
     *   <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
     *   <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
     *   <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
     * </ul>
     *
     * <p>If this option is set in conjunction with reordering mode
     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
     * <code>ubidi_setInverse(true)</code>, it implies
     * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
     * in calls to function <code>ubidi_writeReordered()</code>.</p>
     *
     * <p>For other reordering modes, a minimum number of LRM or RLM characters
     * will be added to the source text after reordering it so as to ensure
     * round trip, i.e. when applying the inverse reordering mode on the
     * resulting logical text with removal of Bidi marks
     * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
     * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
     * in <code>ubidi_writeReordered</code>), the result will be identical to the
     * source text in the first transformation.
     *
     * <p>This option will be ignored if specified together with option
     * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
     * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
     * <code>ubidi_writeReordered()</code> and it implies option
     * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
     * <code>ubidi_writeReordered()</code> if the reordering mode is
     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
     *
     * @see ubidi_setReorderingMode
     * @see ubidi_setReorderingOptions
     * @stable ICU 3.6
     */
    UBIDI_OPTION_INSERT_MARKS = 1,

    /**
     * option bit for <code>ubidi_setReorderingOptions</code>:
     * remove Bidi control characters
     *
     * <p>This option must be set or reset before calling
     * <code>ubidi_setPara</code>.</p>
     *
     * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
     * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
     * to function <code>ubidi_writeReordered()</code> and it implies option
     * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
     *
     * @see ubidi_setReorderingMode
     * @see ubidi_setReorderingOptions
     * @stable ICU 3.6
     */
    UBIDI_OPTION_REMOVE_CONTROLS = 2,

    /**
     * option bit for <code>ubidi_setReorderingOptions</code>:
     * process the output as part of a stream to be continued
     *
     * <p>This option must be set or reset before calling
     * <code>ubidi_setPara</code>.</p>
     *
     * <p>This option specifies that the caller is interested in processing large
     * text object in parts.
     * The results of the successive calls are expected to be concatenated by the
     * caller. Only the call for the last part will have this option bit off.</p>
     *
     * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
     * less than the full source text in order to truncate the text at a meaningful
     * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
     * immediately after calling <code>ubidi_setPara()</code> in order to
     * determine how much of the source text has been processed.
     * Source text beyond that length should be resubmitted in following calls to
     * <code>ubidi_setPara</code>. The processed length may be less than
     * the length of the source text if a character preceding the last character of
     * the source text constitutes a reasonable boundary (like a block separator)
     * for text to be continued.<br>
     * If the last character of the source text constitutes a reasonable
     * boundary, the whole text will be processed at once.<br>
     * If nowhere in the source text there exists
     * such a reasonable boundary, the processed length will be zero.<br>
     * The caller should check for such an occurrence and do one of the following:
     * <ul><li>submit a larger amount of text with a better chance to include
     *         a reasonable boundary.</li>
     *     <li>resubmit the same text after turning off option
     *         <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
     * In all cases, this option should be turned off before processing the last
     * part of the text.</p>
     *
     * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
     * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
     * argument <code>orderParagraphsLTR</code> set to <code>true</code> before
     * calling <code>ubidi_setPara</code> so that later paragraphs may be
     * concatenated to previous paragraphs on the right.</p>
     *
     * @see ubidi_setReorderingMode
     * @see ubidi_setReorderingOptions
     * @see ubidi_getProcessedLength
     * @see ubidi_orderParagraphsLTR
     * @stable ICU 3.6
     */
    UBIDI_OPTION_STREAMING = 4
} UBiDiReorderingOption;

/**
 * Specify which of the reordering options
 * should be applied during Bidi transformations.
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @param reorderingOptions is a combination of zero or more of the following
 * options:
 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
 *
 * @see ubidi_getReorderingOptions
 * @stable ICU 3.6
 */
U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);

/**
 * What are the reordering options applied to a given Bidi object?
 *
 * @param pBiDi is a <code>UBiDi</code> object.
 * @return the current reordering options of the Bidi object
 * @see ubidi_setReorderingOptions
 * @stable ICU 3.6
 */
U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi *pBiDi);

/**
 * Set the context before a call to ubidi_setPara().<p>
 *
 * ubidi_setPara() computes the left-right directionality for a given piece
 * of text which is supplied as one of its arguments. Sometimes this piece
 * of text (the "main text") should be considered in context, because text
 * appearing before ("prologue") and/or after ("epilogue") the main text
 * may affect the result of this computation.<p>
 *
 * This function specifies the prologue and/or the epilogue for the next
 * call to ubidi_setPara(). The characters specified as prologue and
 * epilogue should not be modified by the calling program until the call
 * to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
 * all need specification of a context, ubidi_setContext() must be called
 * before each call to ubidi_setPara(). In other words, a context is not
 * "remembered" after the following successful call to ubidi_setPara().<p>
 *
 * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
 * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
 * ubidi_setContext() which specifies a prologue, the paragraph level will
 * be computed taking in consideration the text in the prologue.<p>
 *
 * When ubidi_setPara() is called without a previous call to
 * ubidi_setContext, the main text is handled as if preceded and followed
 * by strong directional characters at the current paragraph level.
 * Calling ubidi_setContext() with specification of a prologue will change
 * this behavior by handling the main text as if preceded by the last
 * strong character appearing in the prologue, if any.
 * Calling ubidi_setContext() with specification of an epilogue will change
 * the behavior of ubidi_setPara() by handling the main text as if followed
 * by the first strong character or digit appearing in the epilogue, if any.<p>
 *
 * Note 1: if <code>ubidi_setContext</code> is called repeatedly without
 *         calling <code>ubidi_setPara</code>, the earlier calls have no effect,
 *         only the last call will be remembered for the next call to
 *         <code>ubidi_setPara</code>.<p>
 *
 * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
 *         cancels any previous setting of non-empty prologue or epilogue.
 *         The next call to <code>ubidi_setPara()</code> will process no
 *         prologue or epilogue.<p>
 *
 * Note 3: users must be aware that even after setting the context
 *         before a call to ubidi_setPara() to perform e.g. a logical to visual
 *         transformation, the resulting string may not be identical to what it
 *         would have been if all the text, including prologue and epilogue, had
 *         been processed together.<br>
 * Example (upper case letters represent RTL characters):<br>
 * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
 * &nbsp;&nbsp;epilogue = none<br>
 * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
 * &nbsp;&nbsp;paraLevel = UBIDI_LTR<br>
 * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
 *             ("HGF" is adjacent to "xyz")<br>
 * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
 *             ("HGF" is not adjacent to "xyz")<br>
 *
 * @param pBiDi is a paragraph <code>UBiDi</code> object.
 *
 * @param prologue is a pointer to the text which precedes the text that
 *        will be specified in a coming call to ubidi_setPara().
 *        If there is no prologue to consider, then <code>proLength</code>
 *        must be zero and this pointer can be NULL.
 *
 * @param proLength is the length of the prologue; if <code>proLength==-1</code>
 *        then the prologue must be zero-terminated.
 *        Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
 *        that there is no prologue to consider.
 *
 * @param epilogue is a pointer to the text which follows the text that
 *        will be specified in a coming call to ubidi_setPara().
 *        If there is no epilogue to consider, then <code>epiLength</code>
 *        must be zero and this pointer can be NULL.
 *
 * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
 *        then the epilogue must be zero-terminated.
 *        Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
 *        that there is no epilogue to consider.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @see ubidi_setPara
 * @stable ICU 4.8
 */
U_CAPI void U_EXPORT2
ubidi_setContext(UBiDi *pBiDi,
                 const UChar *prologue, int32_t proLength,
                 const UChar *epilogue, int32_t epiLength,
                 UErrorCode *pErrorCode);

/**
 * Perform the Unicode Bidi algorithm. It is defined in the
 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
 * version 13,
 * also described in The Unicode Standard, Version 4.0 .<p>
 *
 * This function takes a piece of plain text containing one or more paragraphs,
 * with or without externally specified embedding levels from <i>styled</i>
 * text and computes the left-right-directionality of each character.<p>
 *
 * If the entire text is all of the same directionality, then
 * the function may not perform all the steps described by the algorithm,
 * i.e., some levels may not be the same as if all steps were performed.
 * This is not relevant for unidirectional text.<br>
 * For example, in pure LTR text with numbers the numbers would get
 * a resolved level of 2 higher than the surrounding text according to
 * the algorithm. This implementation may set all resolved levels to
 * the same value in such a case.<p>
 *
 * The text can be composed of multiple paragraphs. Occurrence of a block
 * separator in the text terminates a paragraph, and whatever comes next starts
 * a new paragraph. The exception to this rule is when a Carriage Return (CR)
 * is followed by a Line Feed (LF). Both CR and LF are block separators, but
 * in that case, the pair of characters is considered as terminating the
 * preceding paragraph, and a new paragraph will be started by a character
 * coming after the LF.
 *
 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
 *        which will be set to contain the reordering information,
 *        especially the resolved levels for all the characters in <code>text</code>.
 *
 * @param text is a pointer to the text that the Bidi algorithm will be performed on.
 *        This pointer is stored in the UBiDi object and can be retrieved
 *        with <code>ubidi_getText()</code>.<br>
 *        <strong>Note:</strong> the text must be (at least) <code>length</code> long.
 *
 * @param length is the length of the text; if <code>length==-1</code> then
 *        the text must be zero-terminated.
 *
 * @param paraLevel specifies the default level for the text;
 *        it is typically 0 (LTR) or 1 (RTL).
 *        If the function shall determine the paragraph level from the text,
 *        then <code>paraLevel</code> can be set to
 *        either <code>#UBIDI_DEFAULT_LTR</code>
 *        or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
 *        paragraphs, the paragraph level shall be determined separately for
 *        each paragraph; if a paragraph does not include any strongly typed
 *        character, then the desired default is used (0 for LTR or 1 for RTL).
 *        Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
 *        is also valid, with odd levels indicating RTL.
 *
 * @param embeddingLevels (in) may be used to preset the embedding and override levels,
 *        ignoring characters like LRE and PDF in the text.
 *        A level overrides the directional property of its corresponding
 *        (same index) character if the level has the
 *        <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
 *        Aside from that bit, it must be
 *        <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
 *        except that level 0 is always allowed.
 *        Level 0 for a paragraph separator prevents reordering of paragraphs;
 *        this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code>
 *        is also set for paragraph separators.
 *        Level 0 for other characters is treated as a wildcard
 *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
 *        <strong>Caution: </strong>A copy of this pointer, not of the levels,
 *        will be stored in the <code>UBiDi</code> object;
 *        the <code>embeddingLevels</code> array must not be
 *        deallocated before the <code>UBiDi</code> structure is destroyed or reused,
 *        and the <code>embeddingLevels</code>
 *        should not be modified to avoid unexpected results on subsequent Bidi operations.
 *        However, the <code>ubidi_setPara()</code> and
 *        <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
 *        After the <code>UBiDi</code> object is reused or destroyed, the caller
 *        must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
 *        <strong>Note:</strong> the <code>embeddingLevels</code> array must be
 *        at least <code>length</code> long.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
              UErrorCode *pErrorCode);

/**
 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
 * contain the reordering information, especially the resolved levels,
 * for all the characters in a line of text. This line of text is
 * specified by referring to a <code>UBiDi</code> object representing
 * this information for a piece of text containing one or more paragraphs,
 * and by specifying a range of indexes in this text.<p>
 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
 *
 * This is used after calling <code>ubidi_setPara()</code>
 * for a piece of text, and after line-breaking on that text.
 * It is not necessary if each paragraph is treated as a single line.<p>
 *
 * After line-breaking, rules (L1) and (L2) for the treatment of
 * trailing WS and for reordering are performed on
 * a <code>UBiDi</code> object that represents a line.<p>
 *
 * <strong>Important: </strong><code>pLineBiDi</code> shares data with
 * <code>pParaBiDi</code>.
 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
 * before the object for its parent paragraph.<p>
 *
 * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
 * and <code>start</code> is added to it so that it points to the beginning of the
 * line for this object.
 *
 * @param pParaBiDi is the parent paragraph object. It must have been set
 * by a successful call to ubidi_setPara.
 *
 * @param start is the line's first index into the text.
 *
 * @param limit is just behind the line's last index into the text
 *        (its last index +1).<br>
 *        It must be <code>0<=start<limit<=</code>containing paragraph limit.
 *        If the specified line crosses a paragraph boundary, the function
 *        will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
 *
 * @param pLineBiDi is the object that will now represent a line of the text.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @see ubidi_setPara
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_setLine(const UBiDi *pParaBiDi,
              int32_t start, int32_t limit,
              UBiDi *pLineBiDi,
              UErrorCode *pErrorCode);

/**
 * Get the directionality of the text.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
 *         or <code>UBIDI_MIXED</code>
 *         that indicates if the entire text
 *         represented by this object is unidirectional,
 *         and which direction, or if it is mixed-directional.
 * Note -  The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
 *
 * @see UBiDiDirection
 * @stable ICU 2.0
 */
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi *pBiDi);

/**
 * Gets the base direction of the text provided according
 * to the Unicode Bidirectional Algorithm. The base direction
 * is derived from the first character in the string with bidirectional
 * character type L, R, or AL. If the first such character has type L,
 * <code>UBIDI_LTR</code> is returned. If the first such character has
 * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
 * not contain any character of these types, then
 * <code>UBIDI_NEUTRAL</code> is returned.
 *
 * This is a lightweight function for use when only the base direction
 * is needed and no further bidi processing of the text is needed.
 *
 * @param text is a pointer to the text whose base
 *             direction is needed.
 * Note: the text must be (at least) @c length long.
 *
 * @param length is the length of the text;
 *               if <code>length==-1</code> then the text
 *               must be zero-terminated.
 *
 * @return  <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
 *          <code>UBIDI_NEUTRAL</code>
 *
 * @see UBiDiDirection
 * @stable ICU 4.6
 */
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar *text,  int32_t length );

/**
 * Get the pointer to the text.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @return The pointer to the text that the UBiDi object was created for.
 *
 * @see ubidi_setPara
 * @see ubidi_setLine
 * @stable ICU 2.0
 */
U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi *pBiDi);

/**
 * Get the length of the text.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @return The length of the text that the UBiDi object was created for.
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi *pBiDi);

/**
 * Get the paragraph level of the text.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @return The paragraph level. If there are multiple paragraphs, their
 *         level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
 *         UBIDI_DEFAULT_RTL.  In that case, the level of the first paragraph
 *         is returned.
 *
 * @see UBiDiLevel
 * @see ubidi_getParagraph
 * @see ubidi_getParagraphByIndex
 * @stable ICU 2.0
 */
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi *pBiDi);

/**
 * Get the number of paragraphs.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @return The number of paragraphs.
 * @stable ICU 3.4
 */
U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi *pBiDi);

/**
 * Get a paragraph, given a position within the text.
 * This function returns information about a paragraph.<br>
 * Note: if the paragraph index is known, it is more efficient to
 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param charIndex is the index of a character within the text, in the
 *        range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
 *
 * @param pParaStart will receive the index of the first character of the
 *        paragraph in the text.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pParaLimit will receive the limit of the paragraph.
 *        The l-value that you point to here may be the
 *        same expression (variable) as the one for
 *        <code>charIndex</code>.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pParaLevel will receive the level of the paragraph.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The index of the paragraph containing the specified position.
 *
 * @see ubidi_getProcessedLength
 * @stable ICU 3.4
 */
U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
                   int32_t *pParaLimit, UBiDiLevel *pParaLevel,
                   UErrorCode *pErrorCode);

/**
 * Get a paragraph, given the index of this paragraph.
 *
 * This function returns information about a paragraph.<p>
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @param paraIndex is the number of the paragraph, in the
 *        range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
 *
 * @param pParaStart will receive the index of the first character of the
 *        paragraph in the text.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pParaLimit will receive the limit of the paragraph.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pParaLevel will receive the level of the paragraph.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @stable ICU 3.4
 */
U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
                          int32_t *pParaStart, int32_t *pParaLimit,
                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);

/**
 * Get the level for one character.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param charIndex the index of a character. It must be in the range
 *         [0..ubidi_getProcessedLength(pBiDi)].
 *
 * @return The level for the character at charIndex (0 if charIndex is not
 *         in the valid range).
 *
 * @see UBiDiLevel
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);

/**
 * Get an array of levels for each character.<p>
 *
 * Note that this function may allocate memory under some
 * circumstances, unlike <code>ubidi_getLevelAt()</code>.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
 *        text length must be strictly positive.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The levels array for the text,
 *         or <code>NULL</code> if an error occurs.
 *
 * @see UBiDiLevel
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI const UBiDiLevel * U_EXPORT2
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);

/**
 * Get a logical run.
 * This function returns information about a run and is used
 * to retrieve runs in logical order.<p>
 * This is especially useful for line-breaking on a paragraph.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param logicalPosition is a logical position within the source text.
 *
 * @param pLogicalLimit will receive the limit of the corresponding run.
 *        The l-value that you point to here may be the
 *        same expression (variable) as the one for
 *        <code>logicalPosition</code>.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @param pLevel will receive the level of the corresponding run.
 *        This pointer can be <code>NULL</code> if this
 *        value is not necessary.
 *
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
                    int32_t *pLogicalLimit, UBiDiLevel *pLevel);

/**
 * Get the number of runs.
 * This function may invoke the actual reordering on the
 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
 * may have resolved only the levels of the text. Therefore,
 * <code>ubidi_countRuns()</code> may have to allocate memory,
 * and may fail doing so.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The number of runs.
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);

/**
 * Get one run's logical start, length, and directionality,
 * which can be 0 for LTR or 1 for RTL.
 * In an RTL run, the character at the logical start is
 * visually on the right of the displayed run.
 * The length is the number of characters in the run.<p>
 * <code>ubidi_countRuns()</code> should be called
 * before the runs are retrieved.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param runIndex is the number of the run in visual order, in the
 *        range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
 *
 * @param pLogicalStart is the first logical character index in the text.
 *        The pointer may be <code>NULL</code> if this index is not needed.
 *
 * @param pLength is the number of characters (at least one) in the run.
 *        The pointer may be <code>NULL</code> if this is not needed.
 *
 * @return the directionality of the run,
 *         <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
 *         never <code>UBIDI_MIXED</code>,
 *         never <code>UBIDI_NEUTRAL</code>.
 *
 * @see ubidi_countRuns
 *
 * Example:
 * <pre>
 * \code
 * int32_t i, count=ubidi_countRuns(pBiDi),
 *         logicalStart, visualIndex=0, length;
 * for(i=0; i<count; ++i) {
 *    if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
 *         do { // LTR
 *             show_char(text[logicalStart++], visualIndex++);
 *         } while(--length>0);
 *     } else {
 *         logicalStart+=length;  // logicalLimit
 *         do { // RTL
 *             show_char(text[--logicalStart], visualIndex++);
 *         } while(--length>0);
 *     }
 * }
 *\endcode
 * </pre>
 *
 * Note that in right-to-left runs, code like this places
 * second surrogates before first ones (which is generally a bad idea)
 * and combining characters before base characters.
 * <p>
 * Use of <code>ubidi_writeReordered()</code>, optionally with the
 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
 * to avoid these issues.
 * @stable ICU 2.0
 */
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
                   int32_t *pLogicalStart, int32_t *pLength);

/**
 * Get the visual position from a logical text position.
 * If such a mapping is used many times on the same
 * <code>UBiDi</code> object, then calling
 * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
 *
 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
 * visual position because the corresponding text character is a Bidi control
 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
 * <p>
 * When the visual output is altered by using options of
 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
 * be correct. It is advised to use, when possible, reordering options
 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
 * <p>
 * Note that in right-to-left runs, this mapping places
 * second surrogates before first ones (which is generally a bad idea)
 * and combining characters before base characters.
 * Use of <code>ubidi_writeReordered()</code>, optionally with the
 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
 * of using the mapping, in order to avoid these issues.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param logicalIndex is the index of a character in the text.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The visual position of this character.
 *
 * @see ubidi_getLogicalMap
 * @see ubidi_getLogicalIndex
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);

/**
 * Get the logical text position from a visual position.
 * If such a mapping is used many times on the same
 * <code>UBiDi</code> object, then calling
 * <code>ubidi_getVisualMap()</code> is more efficient.<p>
 *
 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
 * logical position because the corresponding text character is a Bidi mark
 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
 * <p>
 * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
 * <p>
 * When the visual output is altered by using options of
 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
 * be correct. It is advised to use, when possible, reordering options
 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param visualIndex is the visual position of a character.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The index of this character in the text.
 *
 * @see ubidi_getVisualMap
 * @see ubidi_getVisualIndex
 * @see ubidi_getResultLength
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);

/**
 * Get a logical-to-visual index map (array) for the characters in the UBiDi
 * (paragraph or line) object.
 * <p>
 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
 * corresponding text characters are Bidi controls removed from the visual
 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
 * <p>
 * When the visual output is altered by using options of
 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
 * be correct. It is advised to use, when possible, reordering options
 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
 * <p>
 * Note that in right-to-left runs, this mapping places
 * second surrogates before first ones (which is generally a bad idea)
 * and combining characters before base characters.
 * Use of <code>ubidi_writeReordered()</code>, optionally with the
 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
 * of using the mapping, in order to avoid these issues.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
 *        indexes which will reflect the reordering of the characters.
 *        If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
 *        of elements allocated in <code>indexMap</code> must be no less than
 *        <code>ubidi_getResultLength()</code>.
 *        The array does not need to be initialized.<br><br>
 *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @see ubidi_getVisualMap
 * @see ubidi_getVisualIndex
 * @see ubidi_getProcessedLength
 * @see ubidi_getResultLength
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);

/**
 * Get a visual-to-logical index map (array) for the characters in the UBiDi
 * (paragraph or line) object.
 * <p>
 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
 * corresponding text characters are Bidi marks inserted in the visual output
 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
 * <p>
 * When the visual output is altered by using options of
 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
 * be correct. It is advised to use, when possible, reordering options
 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
 *
 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
 *
 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
 *        indexes which will reflect the reordering of the characters.
 *        If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
 *        of elements allocated in <code>indexMap</code> must be no less than
 *        <code>ubidi_getProcessedLength()</code>.
 *        The array does not need to be initialized.<br><br>
 *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @see ubidi_getLogicalMap
 * @see ubidi_getLogicalIndex
 * @see ubidi_getProcessedLength
 * @see ubidi_getResultLength
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);

/**
 * This is a convenience function that does not use a UBiDi object.
 * It is intended to be used for when an application has determined the levels
 * of objects (character sequences) and just needs to have them reordered (L2).
 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
 * <code>UBiDi</code> object.
 *
 * @param levels is an array with <code>length</code> levels that have been determined by
 *        the application.
 *
 * @param length is the number of levels in the array, or, semantically,
 *        the number of objects to be reordered.
 *        It must be <code>length>0</code>.
 *
 * @param indexMap is a pointer to an array of <code>length</code>
 *        indexes which will reflect the reordering of the characters.
 *        The array does not need to be initialized.<p>
 *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);

/**
 * This is a convenience function that does not use a UBiDi object.
 * It is intended to be used for when an application has determined the levels
 * of objects (character sequences) and just needs to have them reordered (L2).
 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
 * <code>UBiDi</code> object.
 *
 * @param levels is an array with <code>length</code> levels that have been determined by
 *        the application.
 *
 * @param length is the number of levels in the array, or, semantically,
 *        the number of objects to be reordered.
 *        It must be <code>length>0</code>.
 *
 * @param indexMap is a pointer to an array of <code>length</code>
 *        indexes which will reflect the reordering of the characters.
 *        The array does not need to be initialized.<p>
 *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);

/**
 * Invert an index map.
 * The index mapping of the first map is inverted and written to
 * the second one.
 *
 * @param srcMap is an array with <code>length</code> elements
 *        which defines the original mapping from a source array containing
 *        <code>length</code> elements to a destination array.
 *        Some elements of the source array may have no mapping in the
 *        destination array. In that case, their value will be
 *        the special value <code>UBIDI_MAP_NOWHERE</code>.
 *        All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
 *        Some elements may have a value >= <code>length</code>, if the
 *        destination array has more elements than the source array.
 *        There must be no duplicate indexes (two or more elements with the
 *        same value except <code>UBIDI_MAP_NOWHERE</code>).
 *
 * @param destMap is an array with a number of elements equal to 1 + the highest
 *        value in <code>srcMap</code>.
 *        <code>destMap</code> will be filled with the inverse mapping.
 *        If element with index i in <code>srcMap</code> has a value k different
 *        from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
 *        the source array maps to element k in the destination array.
 *        The inverse map will have value i in its k-th element.
 *        For all elements of the destination array which do not map to
 *        an element in the source array, the corresponding element in the
 *        inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
 *
 * @param length is the length of each array.
 * @see UBIDI_MAP_NOWHERE
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);

/** option flags for ubidi_writeReordered() */

/**
 * option bit for ubidi_writeReordered():
 * keep combining characters after their base characters in RTL runs
 *
 * @see ubidi_writeReordered
 * @stable ICU 2.0
 */
#define UBIDI_KEEP_BASE_COMBINING       1

/**
 * option bit for ubidi_writeReordered():
 * replace characters with the "mirrored" property in RTL runs
 * by their mirror-image mappings
 *
 * @see ubidi_writeReordered
 * @stable ICU 2.0
 */
#define UBIDI_DO_MIRRORING              2

/**
 * option bit for ubidi_writeReordered():
 * surround the run with LRMs if necessary;
 * this is part of the approximate "inverse Bidi" algorithm
 *
 * <p>This option does not imply corresponding adjustment of the index
 * mappings.</p>
 *
 * @see ubidi_setInverse
 * @see ubidi_writeReordered
 * @stable ICU 2.0
 */
#define UBIDI_INSERT_LRM_FOR_NUMERIC    4

/**
 * option bit for ubidi_writeReordered():
 * remove Bidi control characters
 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
 *
 * <p>This option does not imply corresponding adjustment of the index
 * mappings.</p>
 *
 * @see ubidi_writeReordered
 * @stable ICU 2.0
 */
#define UBIDI_REMOVE_BIDI_CONTROLS      8

/**
 * option bit for ubidi_writeReordered():
 * write the output in reverse order
 *
 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
 * first without this option, and then calling
 * <code>ubidi_writeReverse()</code> without mirroring.
 * Doing this in the same step is faster and avoids a temporary buffer.
 * An example for using this option is output to a character terminal that
 * is designed for RTL scripts and stores text in reverse order.</p>
 *
 * @see ubidi_writeReordered
 * @stable ICU 2.0
 */
#define UBIDI_OUTPUT_REVERSE            16

/**
 * Get the length of the source text processed by the last call to
 * <code>ubidi_setPara()</code>. This length may be different from the length
 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
 * has been set.
 * <br>
 * Note that whenever the length of the text affects the execution or the
 * result of a function, it is the processed length which must be considered,
 * except for <code>ubidi_setPara</code> (which receives unprocessed source
 * text) and <code>ubidi_getLength</code> (which returns the original length
 * of the source text).<br>
 * In particular, the processed length is the one to consider in the following
 * cases:
 * <ul>
 * <li>maximum value of the <code>limit</code> argument of
 * <code>ubidi_setLine</code></li>
 * <li>maximum value of the <code>charIndex</code> argument of
 * <code>ubidi_getParagraph</code></li>
 * <li>maximum value of the <code>charIndex</code> argument of
 * <code>ubidi_getLevelAt</code></li>
 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
 * <li>maximum value of the <code>logicalStart</code> argument of
 * <code>ubidi_getLogicalRun</code></li>
 * <li>maximum value of the <code>logicalIndex</code> argument of
 * <code>ubidi_getVisualIndex</code></li>
 * <li>number of elements filled in the <code>*indexMap</code> argument of
 * <code>ubidi_getLogicalMap</code></li>
 * <li>length of text processed by <code>ubidi_writeReordered</code></li>
 * </ul>
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @return The length of the part of the source text processed by
 *         the last call to <code>ubidi_setPara</code>.
 * @see ubidi_setPara
 * @see UBIDI_OPTION_STREAMING
 * @stable ICU 3.6
 */
U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi *pBiDi);

/**
 * Get the length of the reordered text resulting from the last call to
 * <code>ubidi_setPara()</code>. This length may be different from the length
 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
 * <br>
 * This resulting length is the one to consider in the following cases:
 * <ul>
 * <li>maximum value of the <code>visualIndex</code> argument of
 * <code>ubidi_getLogicalIndex</code></li>
 * <li>number of elements of the <code>*indexMap</code> argument of
 * <code>ubidi_getVisualMap</code></li>
 * </ul>
 * Note that this length stays identical to the source text length if
 * Bidi marks are inserted or removed using option bits of
 * <code>ubidi_writeReordered</code>, or if option
 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @return The length of the reordered text resulting from
 *         the last call to <code>ubidi_setPara</code>.
 * @see ubidi_setPara
 * @see UBIDI_OPTION_INSERT_MARKS
 * @see UBIDI_OPTION_REMOVE_CONTROLS
 * @stable ICU 3.6
 */
U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi *pBiDi);

U_CDECL_BEGIN

#ifndef U_HIDE_DEPRECATED_API
/**
 * Value returned by <code>UBiDiClassCallback</code> callbacks when
 * there is no need to override the standard Bidi class for a given code point.
 *
 * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead.
 *
 * @see UBiDiClassCallback
 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
 */
#define U_BIDI_CLASS_DEFAULT  U_CHAR_DIRECTION_COUNT
#endif  // U_HIDE_DEPRECATED_API

/**
 * Callback type declaration for overriding default Bidi class values with
 * custom ones.
 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
 * object by calling the <code>ubidi_setClassCallback()</code> function;
 * then the callback will be invoked by the UBA implementation any time the
 * class of a character is to be determined.</p>
 *
 * @param context is a pointer to the callback private data.
 *
 * @param c       is the code point to get a Bidi class for.
 *
 * @return The directional property / Bidi class for the given code point
 *         <code>c</code> if the default class has been overridden, or
 *         <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
 *         if the standard Bidi class value for <code>c</code> is to be used.
 * @see ubidi_setClassCallback
 * @see ubidi_getClassCallback
 * @stable ICU 3.6
 */
typedef UCharDirection U_CALLCONV
UBiDiClassCallback(const void *context, UChar32 c);

U_CDECL_END

/**
 * Retrieve the Bidi class for a given code point.
 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
 * value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
 * that value is used; otherwise the default class determination mechanism is invoked.</p>
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @param c     is the code point whose Bidi class must be retrieved.
 *
 * @return The Bidi class for character <code>c</code> based
 *         on the given <code>pBiDi</code> instance.
 * @see UBiDiClassCallback
 * @stable ICU 3.6
 */
U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);

/**
 * Set the callback function and callback data used by the UBA
 * implementation for Bidi class determination.
 * <p>This may be useful for assigning Bidi classes to PUA characters, or
 * for special application needs. For instance, an application may want to
 * handle all spaces like L or R characters (according to the base direction)
 * when creating the visual ordering of logical lines which are part of a report
 * organized in columns: there should not be interaction between adjacent
 * cells.<p>
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @param newFn is the new callback function pointer.
 *
 * @param newContext is the new callback context pointer. This can be NULL.
 *
 * @param oldFn fillin: Returns the old callback function pointer. This can be
 *                      NULL.
 *
 * @param oldContext fillin: Returns the old callback's context. This can be
 *                           NULL.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @see ubidi_getClassCallback
 * @stable ICU 3.6
 */
U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
                       const void *newContext, UBiDiClassCallback **oldFn,
                       const void **oldContext, UErrorCode *pErrorCode);

/**
 * Get the current callback function used for Bidi class determination.
 *
 * @param pBiDi is the paragraph <code>UBiDi</code> object.
 *
 * @param fn fillin: Returns the callback function pointer.
 *
 * @param context fillin: Returns the callback's private context.
 *
 * @see ubidi_setClassCallback
 * @stable ICU 3.6
 */
U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);

/**
 * Take a <code>UBiDi</code> object containing the reordering
 * information for a piece of text (one or more paragraphs) set by
 * <code>ubidi_setPara()</code> or for a line of text set by
 * <code>ubidi_setLine()</code> and write a reordered string to the
 * destination buffer.
 *
 * This function preserves the integrity of characters with multiple
 * code units and (optionally) combining characters.
 * Characters in RTL runs can be replaced by mirror-image characters
 * in the destination buffer. Note that "real" mirroring has
 * to be done in a rendering engine by glyph selection
 * and that for many "mirrored" characters there are no
 * Unicode characters as mirror-image equivalents.
 * There are also options to insert or remove Bidi control
 * characters; see the description of the <code>destSize</code>
 * and <code>options</code> parameters and of the option bit flags.
 *
 * @param pBiDi A pointer to a <code>UBiDi</code> object that
 *              is set by <code>ubidi_setPara()</code> or
 *              <code>ubidi_setLine()</code> and contains the reordering
 *              information for the text that it was defined for,
 *              as well as a pointer to that text.<br><br>
 *              The text was aliased (only the pointer was stored
 *              without copying the contents) and must not have been modified
 *              since the <code>ubidi_setPara()</code> call.
 *
 * @param dest A pointer to where the reordered text is to be copied.
 *             The source text and <code>dest[destSize]</code>
 *             must not overlap.
 *
 * @param destSize The size of the <code>dest</code> buffer,
 *                 in number of UChars.
 *                 If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
 *                 option is set, then the destination length could be
 *                 as large as
 *                 <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
 *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
 *                 is set, then the destination length may be less than
 *                 <code>ubidi_getLength(pBiDi)</code>.
 *                 If none of these options is set, then the destination length
 *                 will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
 *
 * @param options A bit set of options for the reordering that control
 *                how the reordered text is written.
 *                The options include mirroring the characters on a code
 *                point basis and inserting LRM characters, which is used
 *                especially for transforming visually stored text
 *                to logically stored text (although this is still an
 *                imperfect implementation of an "inverse Bidi" algorithm
 *                because it uses the "forward Bidi" algorithm at its core).
 *                The available options are:
 *                <code>#UBIDI_DO_MIRRORING</code>,
 *                <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
 *                <code>#UBIDI_KEEP_BASE_COMBINING</code>,
 *                <code>#UBIDI_OUTPUT_REVERSE</code>,
 *                <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The length of the output string.
 *
 * @see ubidi_getProcessedLength
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_writeReordered(UBiDi *pBiDi,
                     UChar *dest, int32_t destSize,
                     uint16_t options,
                     UErrorCode *pErrorCode);

/**
 * Reverse a Right-To-Left run of Unicode text.
 *
 * This function preserves the integrity of characters with multiple
 * code units and (optionally) combining characters.
 * Characters can be replaced by mirror-image characters
 * in the destination buffer. Note that "real" mirroring has
 * to be done in a rendering engine by glyph selection
 * and that for many "mirrored" characters there are no
 * Unicode characters as mirror-image equivalents.
 * There are also options to insert or remove Bidi control
 * characters.
 *
 * This function is the implementation for reversing RTL runs as part
 * of <code>ubidi_writeReordered()</code>. For detailed descriptions
 * of the parameters, see there.
 * Since no Bidi controls are inserted here, the output string length
 * will never exceed <code>srcLength</code>.
 *
 * @see ubidi_writeReordered
 *
 * @param src A pointer to the RTL run text.
 *
 * @param srcLength The length of the RTL run.
 *
 * @param dest A pointer to where the reordered text is to be copied.
 *             <code>src[srcLength]</code> and <code>dest[destSize]</code>
 *             must not overlap.
 *
 * @param destSize The size of the <code>dest</code> buffer,
 *                 in number of UChars.
 *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
 *                 is set, then the destination length may be less than
 *                 <code>srcLength</code>.
 *                 If this option is not set, then the destination length
 *                 will be exactly <code>srcLength</code>.
 *
 * @param options A bit set of options for the reordering that control
 *                how the reordered text is written.
 *                See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
 *
 * @param pErrorCode must be a valid pointer to an error code value.
 *
 * @return The length of the output string.
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ubidi_writeReverse(const UChar *src, int32_t srcLength,
                   UChar *dest, int32_t destSize,
                   uint16_t options,
                   UErrorCode *pErrorCode);

/*#define BIDI_SAMPLE_CODE*/
/*@}*/

#endif
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 // © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1997-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* File UCHAR.H
*
* Modification History:
*
*   Date        Name        Description
*   04/02/97    aliu        Creation.
*   03/29/99    helena      Updated for C APIs.
*   4/15/99     Madhu       Updated for C Implementation and Javadoc
*   5/20/99     Madhu       Added the function u_getVersion()
*   8/19/1999   srl         Upgraded scripts to Unicode 3.0
*   8/27/1999   schererm    UCharDirection constants: U_...
*   11/11/1999  weiv        added u_isalnum(), cleaned comments
*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
******************************************************************************
*/

#ifndef UCHAR_H
#define UCHAR_H

#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
#include "unicode/ucpmap.h"

#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)

#define USET_DEFINED

/**
 * USet is the C API type corresponding to C++ class UnicodeSet.
 * It is forward-declared here to avoid including unicode/uset.h file if related
 * APIs are not used.
 *
 * @see ucnv_getUnicodeSet
 * @stable ICU 2.4
 */
typedef struct USet USet;

#endif


U_CDECL_BEGIN

/*==========================================================================*/
/* Unicode version number                                                   */
/*==========================================================================*/
/**
 * Unicode version number, default for the current ICU version.
 * The actual Unicode Character Database (UCD) data is stored in uprops.dat
 * and may be generated from UCD files from a different Unicode version.
 * Call u_getUnicodeVersion to get the actual Unicode version of the data.
 *
 * @see u_getUnicodeVersion
 * @stable ICU 2.0
 */
#define U_UNICODE_VERSION "15.0"

/**
 * \file
 * \brief C API: Unicode Properties
 *
 * This C API provides low-level access to the Unicode Character Database.
 * In addition to raw property values, some convenience functions calculate
 * derived properties, for example for Java-style programming.
 *
 * Unicode assigns each code point (not just assigned character) values for
 * many properties.
 * Most of them are simple boolean flags, or constants from a small enumerated list.
 * For some properties, values are strings or other relatively more complex types.
 *
 * For more information see
 * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
 * and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties).
 *
 * Many properties are accessible via generic functions that take a UProperty selector.
 * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
 * - u_getIntPropertyValue() returns an integer value per property and code point.
 *   For each supported enumerated or catalog property, there is
 *   an enum type for all of the property's values, and
 *   u_getIntPropertyValue() returns the numeric values of those constants.
 * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
 *   all code points for which the property is true.
 * - u_getIntPropertyMap() returns a map for each
 *   ICU-supported enumerated/catalog/int-valued property which
 *   maps all Unicode code points to their values for that property.
 *
 * Many functions are designed to match java.lang.Character functions.
 * See the individual function documentation,
 * and see the JDK 1.4 java.lang.Character documentation
 * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
 *
 * There are also functions that provide easy migration from C/POSIX functions
 * like isblank(). Their use is generally discouraged because the C/POSIX
 * standards do not define their semantics beyond the ASCII range, which means
 * that different implementations exhibit very different behavior.
 * Instead, Unicode properties should be used directly.
 *
 * There are also only a few, broad C/POSIX character classes, and they tend
 * to be used for conflicting purposes. For example, the "isalpha()" class
 * is sometimes used to determine word boundaries, while a more sophisticated
 * approach would at least distinguish initial letters from continuation
 * characters (the latter including combining marks).
 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
 * Another example: There is no "istitle()" class for titlecase characters.
 *
 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
 * ICU implements them according to the Standard Recommendations in
 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
 *
 * API access for C/POSIX character classes is as follows:
 * - alpha:     u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
 * - lower:     u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
 * - upper:     u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
 * - punct:     u_ispunct(c)
 * - digit:     u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
 * - xdigit:    u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
 * - alnum:     u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
 * - space:     u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
 * - blank:     u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
 * - cntrl:     u_charType(c)==U_CONTROL_CHAR
 * - graph:     u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
 * - print:     u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
 *
 * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
 * the Standard Recommendations in UTS #18. Instead, they match Java
 * functions according to their API documentation.
 *
 * \htmlonly
 * The C/POSIX character classes are also available in UnicodeSet patterns,
 * using patterns like [:graph:] or \p{graph}.
 * \endhtmlonly
 *
 * Note: There are several ICU whitespace functions.
 * Comparison:
 * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
 *       most of general categories "Z" (separators) + most whitespace ISO controls
 *       (including no-break spaces, but excluding IS1..IS4)
 * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
 * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
 * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
 * - u_isblank: "horizontal spaces" = TAB + Zs
 */

/**
 * Constants.
 */

/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
#define UCHAR_MIN_VALUE 0

/**
 * The highest Unicode code point value (scalar value) according to
 * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
 * For a single character, UChar32 is a simple type that can hold any code point value.
 *
 * @see UChar32
 * @stable ICU 2.0
 */
#define UCHAR_MAX_VALUE 0x10ffff

/**
 * Get a single-bit bit set (a flag) from a bit number 0..31.
 * @stable ICU 2.1
 */
#define U_MASK(x) ((uint32_t)1<<(x))

/**
 * Selection constants for Unicode properties.
 * These constants are used in functions like u_hasBinaryProperty to select
 * one of the Unicode properties.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 *
 * For details about the properties see
 * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
 *
 * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
 * then properties marked with "new in Unicode 3.2" are not or not fully available.
 * Check u_getUnicodeVersion to be sure.
 *
 * @see u_hasBinaryProperty
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 2.1
 */
typedef enum UProperty {
    /*
     * Note: UProperty constants are parsed by preparseucd.py.
     * It matches lines like
     *     UCHAR_<Unicode property name>=<integer>,
     */

    /*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
    debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
    rather than UCHAR_BINARY_START.  Likewise for other *_START
    identifiers. */

    /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
        Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
    UCHAR_ALPHABETIC=0,
    /** First constant for binary Unicode properties. @stable ICU 2.1 */
    UCHAR_BINARY_START=UCHAR_ALPHABETIC,
    /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
    UCHAR_ASCII_HEX_DIGIT=1,
    /** Binary property Bidi_Control.
        Format controls which have specific functions
        in the Bidi Algorithm. @stable ICU 2.1 */
    UCHAR_BIDI_CONTROL=2,
    /** Binary property Bidi_Mirrored.
        Characters that may change display in RTL text.
        Same as u_isMirrored.
        See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
    UCHAR_BIDI_MIRRORED=3,
    /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
    UCHAR_DASH=4,
    /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
        Ignorable in most processing.
        <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
    UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
    /** Binary property Deprecated (new in Unicode 3.2).
        The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
    UCHAR_DEPRECATED=6,
    /** Binary property Diacritic. Characters that linguistically modify
        the meaning of another character to which they apply. @stable ICU 2.1 */
    UCHAR_DIACRITIC=7,
    /** Binary property Extender.
        Extend the value or shape of a preceding alphabetic character,
        e.g., length and iteration marks. @stable ICU 2.1 */
    UCHAR_EXTENDER=8,
    /** Binary property Full_Composition_Exclusion.
        CompositionExclusions.txt+Singleton Decompositions+
        Non-Starter Decompositions. @stable ICU 2.1 */
    UCHAR_FULL_COMPOSITION_EXCLUSION=9,
    /** Binary property Grapheme_Base (new in Unicode 3.2).
        For programmatic determination of grapheme cluster boundaries.
        [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
    UCHAR_GRAPHEME_BASE=10,
    /** Binary property Grapheme_Extend (new in Unicode 3.2).
        For programmatic determination of grapheme cluster boundaries.
        Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
    UCHAR_GRAPHEME_EXTEND=11,
    /** Binary property Grapheme_Link (new in Unicode 3.2).
        For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
    UCHAR_GRAPHEME_LINK=12,
    /** Binary property Hex_Digit.
        Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
    UCHAR_HEX_DIGIT=13,
    /** Binary property Hyphen. Dashes used to mark connections
        between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
    UCHAR_HYPHEN=14,
    /** Binary property ID_Continue.
        Characters that can continue an identifier.
        DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
        ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
    UCHAR_ID_CONTINUE=15,
    /** Binary property ID_Start.
        Characters that can start an identifier.
        Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
    UCHAR_ID_START=16,
    /** Binary property Ideographic.
        CJKV ideographs. @stable ICU 2.1 */
    UCHAR_IDEOGRAPHIC=17,
    /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
        For programmatic determination of
        Ideographic Description Sequences. @stable ICU 2.1 */
    UCHAR_IDS_BINARY_OPERATOR=18,
    /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
        For programmatic determination of
        Ideographic Description Sequences. @stable ICU 2.1 */
    UCHAR_IDS_TRINARY_OPERATOR=19,
    /** Binary property Join_Control.
        Format controls for cursive joining and ligation. @stable ICU 2.1 */
    UCHAR_JOIN_CONTROL=20,
    /** Binary property Logical_Order_Exception (new in Unicode 3.2).
        Characters that do not use logical order and
        require special handling in most processing. @stable ICU 2.1 */
    UCHAR_LOGICAL_ORDER_EXCEPTION=21,
    /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
        Ll+Other_Lowercase @stable ICU 2.1 */
    UCHAR_LOWERCASE=22,
    /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
    UCHAR_MATH=23,
    /** Binary property Noncharacter_Code_Point.
        Code points that are explicitly defined as illegal
        for the encoding of characters. @stable ICU 2.1 */
    UCHAR_NONCHARACTER_CODE_POINT=24,
    /** Binary property Quotation_Mark. @stable ICU 2.1 */
    UCHAR_QUOTATION_MARK=25,
    /** Binary property Radical (new in Unicode 3.2).
        For programmatic determination of
        Ideographic Description Sequences. @stable ICU 2.1 */
    UCHAR_RADICAL=26,
    /** Binary property Soft_Dotted (new in Unicode 3.2).
        Characters with a "soft dot", like i or j.
        An accent placed on these characters causes
        the dot to disappear. @stable ICU 2.1 */
    UCHAR_SOFT_DOTTED=27,
    /** Binary property Terminal_Punctuation.
        Punctuation characters that generally mark
        the end of textual units. @stable ICU 2.1 */
    UCHAR_TERMINAL_PUNCTUATION=28,
    /** Binary property Unified_Ideograph (new in Unicode 3.2).
        For programmatic determination of
        Ideographic Description Sequences. @stable ICU 2.1 */
    UCHAR_UNIFIED_IDEOGRAPH=29,
    /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
        Lu+Other_Uppercase @stable ICU 2.1 */
    UCHAR_UPPERCASE=30,
    /** Binary property White_Space.
        Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
        Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
    UCHAR_WHITE_SPACE=31,
    /** Binary property XID_Continue.
        ID_Continue modified to allow closure under
        normalization forms NFKC and NFKD. @stable ICU 2.1 */
    UCHAR_XID_CONTINUE=32,
    /** Binary property XID_Start. ID_Start modified to allow
        closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
    UCHAR_XID_START=33,
    /** Binary property Case_Sensitive. Either the source of a case
        mapping or _in_ the target of a case mapping. Not the same as
        the general category Cased_Letter. @stable ICU 2.6 */
   UCHAR_CASE_SENSITIVE=34,
    /** Binary property STerm (new in Unicode 4.0.1).
        Sentence Terminal. Used in UAX #29: Text Boundaries
        (http://www.unicode.org/reports/tr29/)
        @stable ICU 3.0 */
    UCHAR_S_TERM=35,
    /** Binary property Variation_Selector (new in Unicode 4.0.1).
        Indicates all those characters that qualify as Variation Selectors.
        For details on the behavior of these characters,
        see StandardizedVariants.html and 15.6 Variation Selectors.
        @stable ICU 3.0 */
    UCHAR_VARIATION_SELECTOR=36,
    /** Binary property NFD_Inert.
        ICU-specific property for characters that are inert under NFD,
        i.e., they do not interact with adjacent characters.
        See the documentation for the Normalizer2 class and the
        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFD_INERT=37,
    /** Binary property NFKD_Inert.
        ICU-specific property for characters that are inert under NFKD,
        i.e., they do not interact with adjacent characters.
        See the documentation for the Normalizer2 class and the
        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFKD_INERT=38,
    /** Binary property NFC_Inert.
        ICU-specific property for characters that are inert under NFC,
        i.e., they do not interact with adjacent characters.
        See the documentation for the Normalizer2 class and the
        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFC_INERT=39,
    /** Binary property NFKC_Inert.
        ICU-specific property for characters that are inert under NFKC,
        i.e., they do not interact with adjacent characters.
        See the documentation for the Normalizer2 class and the
        Normalizer2::isInert() method.
        @stable ICU 3.0 */
    UCHAR_NFKC_INERT=40,
    /** Binary Property Segment_Starter.
        ICU-specific property for characters that are starters in terms of
        Unicode normalization and combining character sequences.
        They have ccc=0 and do not occur in non-initial position of the
        canonical decomposition of any character
        (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
        ICU uses this property for segmenting a string for generating a set of
        canonically equivalent strings, e.g. for canonical closure while
        processing collation tailoring rules.
        @stable ICU 3.0 */
    UCHAR_SEGMENT_STARTER=41,
    /** Binary property Pattern_Syntax (new in Unicode 4.1).
        See UAX #31 Identifier and Pattern Syntax
        (http://www.unicode.org/reports/tr31/)
        @stable ICU 3.4 */
    UCHAR_PATTERN_SYNTAX=42,
    /** Binary property Pattern_White_Space (new in Unicode 4.1).
        See UAX #31 Identifier and Pattern Syntax
        (http://www.unicode.org/reports/tr31/)
        @stable ICU 3.4 */
    UCHAR_PATTERN_WHITE_SPACE=43,
    /** Binary property alnum (a C/POSIX character class).
        Implemented according to the UTS #18 Annex C Standard Recommendation.
        See the uchar.h file documentation.
        @stable ICU 3.4 */
    UCHAR_POSIX_ALNUM=44,
    /** Binary property blank (a C/POSIX character class).
        Implemented according to the UTS #18 Annex C Standard Recommendation.
        See the uchar.h file documentation.
        @stable ICU 3.4 */
    UCHAR_POSIX_BLANK=45,
    /** Binary property graph (a C/POSIX character class).
        Implemented according to the UTS #18 Annex C Standard Recommendation.
        See the uchar.h file documentation.
        @stable ICU 3.4 */
    UCHAR_POSIX_GRAPH=46,
    /** Binary property print (a C/POSIX character class).
        Implemented according to the UTS #18 Annex C Standard Recommendation.
        See the uchar.h file documentation.
        @stable ICU 3.4 */
    UCHAR_POSIX_PRINT=47,
    /** Binary property xdigit (a C/POSIX character class).
        Implemented according to the UTS #18 Annex C Standard Recommendation.
        See the uchar.h file documentation.
        @stable ICU 3.4 */
    UCHAR_POSIX_XDIGIT=48,
    /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
    UCHAR_CASED=49,
    /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
    UCHAR_CASE_IGNORABLE=50,
    /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_LOWERCASED=51,
    /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_UPPERCASED=52,
    /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_TITLECASED=53,
    /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_CASEFOLDED=54,
    /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_CASEMAPPED=55,
    /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
    UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
    /**
     * Binary property Emoji.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 57
     */
    UCHAR_EMOJI=57,
    /**
     * Binary property Emoji_Presentation.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 57
     */
    UCHAR_EMOJI_PRESENTATION=58,
    /**
     * Binary property Emoji_Modifier.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 57
     */
    UCHAR_EMOJI_MODIFIER=59,
    /**
     * Binary property Emoji_Modifier_Base.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 57
     */
    UCHAR_EMOJI_MODIFIER_BASE=60,
    /**
     * Binary property Emoji_Component.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 60
     */
    UCHAR_EMOJI_COMPONENT=61,
    /**
     * Binary property Regional_Indicator.
     * @stable ICU 60
     */
    UCHAR_REGIONAL_INDICATOR=62,
    /**
     * Binary property Prepended_Concatenation_Mark.
     * @stable ICU 60
     */
    UCHAR_PREPENDED_CONCATENATION_MARK=63,
    /**
     * Binary property Extended_Pictographic.
     * See http://www.unicode.org/reports/tr51/#Emoji_Properties
     *
     * @stable ICU 62
     */
    UCHAR_EXTENDED_PICTOGRAPHIC=64,
    /**
     * Binary property of strings Basic_Emoji.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_BASIC_EMOJI=65,
    /**
     * Binary property of strings Emoji_Keycap_Sequence.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_EMOJI_KEYCAP_SEQUENCE=66,
    /**
     * Binary property of strings RGI_Emoji_Modifier_Sequence.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67,
    /**
     * Binary property of strings RGI_Emoji_Flag_Sequence.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68,
    /**
     * Binary property of strings RGI_Emoji_Tag_Sequence.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_RGI_EMOJI_TAG_SEQUENCE=69,
    /**
     * Binary property of strings RGI_Emoji_ZWJ_Sequence.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70,
    /**
     * Binary property of strings RGI_Emoji.
     * See https://www.unicode.org/reports/tr51/#Emoji_Sets
     *
     * @stable ICU 70
     */
    UCHAR_RGI_EMOJI=71,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for binary Unicode properties.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_BINARY_LIMIT=72,
#endif  // U_HIDE_DEPRECATED_API

    /** Enumerated property Bidi_Class.
        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
    UCHAR_BIDI_CLASS=0x1000,
    /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
    UCHAR_INT_START=UCHAR_BIDI_CLASS,
    /** Enumerated property Block.
        Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
    UCHAR_BLOCK=0x1001,
    /** Enumerated property Canonical_Combining_Class.
        Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
    UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
    /** Enumerated property Decomposition_Type.
        Returns UDecompositionType values. @stable ICU 2.2 */
    UCHAR_DECOMPOSITION_TYPE=0x1003,
    /** Enumerated property East_Asian_Width.
        See http://www.unicode.org/reports/tr11/
        Returns UEastAsianWidth values. @stable ICU 2.2 */
    UCHAR_EAST_ASIAN_WIDTH=0x1004,
    /** Enumerated property General_Category.
        Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
    UCHAR_GENERAL_CATEGORY=0x1005,
    /** Enumerated property Joining_Group.
        Returns UJoiningGroup values. @stable ICU 2.2 */
    UCHAR_JOINING_GROUP=0x1006,
    /** Enumerated property Joining_Type.
        Returns UJoiningType values. @stable ICU 2.2 */
    UCHAR_JOINING_TYPE=0x1007,
    /** Enumerated property Line_Break.
        Returns ULineBreak values. @stable ICU 2.2 */
    UCHAR_LINE_BREAK=0x1008,
    /** Enumerated property Numeric_Type.
        Returns UNumericType values. @stable ICU 2.2 */
    UCHAR_NUMERIC_TYPE=0x1009,
    /** Enumerated property Script.
        Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
    UCHAR_SCRIPT=0x100A,
    /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
        Returns UHangulSyllableType values. @stable ICU 2.6 */
    UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
    /** Enumerated property NFD_Quick_Check.
        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
    UCHAR_NFD_QUICK_CHECK=0x100C,
    /** Enumerated property NFKD_Quick_Check.
        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
    UCHAR_NFKD_QUICK_CHECK=0x100D,
    /** Enumerated property NFC_Quick_Check.
        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
    UCHAR_NFC_QUICK_CHECK=0x100E,
    /** Enumerated property NFKC_Quick_Check.
        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
    UCHAR_NFKC_QUICK_CHECK=0x100F,
    /** Enumerated property Lead_Canonical_Combining_Class.
        ICU-specific property for the ccc of the first code point
        of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
        Useful for checking for canonically ordered text;
        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
    UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
    /** Enumerated property Trail_Canonical_Combining_Class.
        ICU-specific property for the ccc of the last code point
        of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
        Useful for checking for canonically ordered text;
        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
    UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
    /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
        Used in UAX #29: Text Boundaries
        (http://www.unicode.org/reports/tr29/)
        Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
    UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
    /** Enumerated property Sentence_Break (new in Unicode 4.1).
        Used in UAX #29: Text Boundaries
        (http://www.unicode.org/reports/tr29/)
        Returns USentenceBreak values. @stable ICU 3.4 */
    UCHAR_SENTENCE_BREAK=0x1013,
    /** Enumerated property Word_Break (new in Unicode 4.1).
        Used in UAX #29: Text Boundaries
        (http://www.unicode.org/reports/tr29/)
        Returns UWordBreakValues values. @stable ICU 3.4 */
    UCHAR_WORD_BREAK=0x1014,
    /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
        Used in UAX #9: Unicode Bidirectional Algorithm
        (http://www.unicode.org/reports/tr9/)
        Returns UBidiPairedBracketType values. @stable ICU 52 */
    UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
    /**
     * Enumerated property Indic_Positional_Category.
     * New in Unicode 6.0 as provisional property Indic_Matra_Category;
     * renamed and changed to informative in Unicode 8.0.
     * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
     * @stable ICU 63
     */
    UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
    /**
     * Enumerated property Indic_Syllabic_Category.
     * New in Unicode 6.0 as provisional; informative since Unicode 8.0.
     * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
     * @stable ICU 63
     */
    UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
    /**
     * Enumerated property Vertical_Orientation.
     * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
     * New as a UCD property in Unicode 10.0.
     * @stable ICU 63
     */
    UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for enumerated/integer Unicode properties.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_INT_LIMIT=0x1019,
#endif  // U_HIDE_DEPRECATED_API

    /** Bitmask property General_Category_Mask.
        This is the General_Category property returned as a bit mask.
        When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
        returns bit masks for UCharCategory values where exactly one bit is set.
        When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
        a multi-bit mask is used for sets of categories like "Letters".
        Mask values should be cast to uint32_t.
        @stable ICU 2.4 */
    UCHAR_GENERAL_CATEGORY_MASK=0x2000,
    /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
    UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for bit-mask Unicode properties.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_MASK_LIMIT=0x2001,
#endif  // U_HIDE_DEPRECATED_API

    /** Double property Numeric_Value.
        Corresponds to u_getNumericValue. @stable ICU 2.4 */
    UCHAR_NUMERIC_VALUE=0x3000,
    /** First constant for double Unicode properties. @stable ICU 2.4 */
    UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for double Unicode properties.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_DOUBLE_LIMIT=0x3001,
#endif  // U_HIDE_DEPRECATED_API

    /** String property Age.
        Corresponds to u_charAge. @stable ICU 2.4 */
    UCHAR_AGE=0x4000,
    /** First constant for string Unicode properties. @stable ICU 2.4 */
    UCHAR_STRING_START=UCHAR_AGE,
    /** String property Bidi_Mirroring_Glyph.
        Corresponds to u_charMirror. @stable ICU 2.4 */
    UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
    /** String property Case_Folding.
        Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
    UCHAR_CASE_FOLDING=0x4002,
#ifndef U_HIDE_DEPRECATED_API
    /** Deprecated string property ISO_Comment.
        Corresponds to u_getISOComment. @deprecated ICU 49 */
    UCHAR_ISO_COMMENT=0x4003,
#endif  /* U_HIDE_DEPRECATED_API */
    /** String property Lowercase_Mapping.
        Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
    UCHAR_LOWERCASE_MAPPING=0x4004,
    /** String property Name.
        Corresponds to u_charName. @stable ICU 2.4 */
    UCHAR_NAME=0x4005,
    /** String property Simple_Case_Folding.
        Corresponds to u_foldCase. @stable ICU 2.4 */
    UCHAR_SIMPLE_CASE_FOLDING=0x4006,
    /** String property Simple_Lowercase_Mapping.
        Corresponds to u_tolower. @stable ICU 2.4 */
    UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
    /** String property Simple_Titlecase_Mapping.
        Corresponds to u_totitle. @stable ICU 2.4 */
    UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
    /** String property Simple_Uppercase_Mapping.
        Corresponds to u_toupper. @stable ICU 2.4 */
    UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
    /** String property Titlecase_Mapping.
        Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
    UCHAR_TITLECASE_MAPPING=0x400A,
#ifndef U_HIDE_DEPRECATED_API
    /** String property Unicode_1_Name.
        This property is of little practical value.
        Beginning with ICU 49, ICU APIs return an empty string for this property.
        Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
    UCHAR_UNICODE_1_NAME=0x400B,
#endif  /* U_HIDE_DEPRECATED_API */
    /** String property Uppercase_Mapping.
        Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
    UCHAR_UPPERCASE_MAPPING=0x400C,
    /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
        Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
    UCHAR_BIDI_PAIRED_BRACKET=0x400D,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for string Unicode properties.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_STRING_LIMIT=0x400E,
#endif  // U_HIDE_DEPRECATED_API

    /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
        Some characters are commonly used in multiple scripts.
        For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
        Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
        @stable ICU 4.6 */
    UCHAR_SCRIPT_EXTENSIONS=0x7000,
    /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
    UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the last constant for Unicode properties with unusual value types.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
#endif  // U_HIDE_DEPRECATED_API

    /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
    UCHAR_INVALID_CODE = -1
} UProperty;

/**
 * Data for enumerated Unicode general category types.
 * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
 * @stable ICU 2.0
 */
typedef enum UCharCategory
{
    /*
     * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
     * It matches pairs of lines like
     *     / ** <Unicode 2-letter General_Category value> comment... * /
     *     U_<[A-Z_]+> = <integer>,
     */

    /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
    U_UNASSIGNED              = 0,
    /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
    U_GENERAL_OTHER_TYPES     = 0,
    /** Lu @stable ICU 2.0 */
    U_UPPERCASE_LETTER        = 1,
    /** Ll @stable ICU 2.0 */
    U_LOWERCASE_LETTER        = 2,
    /** Lt @stable ICU 2.0 */
    U_TITLECASE_LETTER        = 3,
    /** Lm @stable ICU 2.0 */
    U_MODIFIER_LETTER         = 4,
    /** Lo @stable ICU 2.0 */
    U_OTHER_LETTER            = 5,
    /** Mn @stable ICU 2.0 */
    U_NON_SPACING_MARK        = 6,
    /** Me @stable ICU 2.0 */
    U_ENCLOSING_MARK          = 7,
    /** Mc @stable ICU 2.0 */
    U_COMBINING_SPACING_MARK  = 8,
    /** Nd @stable ICU 2.0 */
    U_DECIMAL_DIGIT_NUMBER    = 9,
    /** Nl @stable ICU 2.0 */
    U_LETTER_NUMBER           = 10,
    /** No @stable ICU 2.0 */
    U_OTHER_NUMBER            = 11,
    /** Zs @stable ICU 2.0 */
    U_SPACE_SEPARATOR         = 12,
    /** Zl @stable ICU 2.0 */
    U_LINE_SEPARATOR          = 13,
    /** Zp @stable ICU 2.0 */
    U_PARAGRAPH_SEPARATOR     = 14,
    /** Cc @stable ICU 2.0 */
    U_CONTROL_CHAR            = 15,
    /** Cf @stable ICU 2.0 */
    U_FORMAT_CHAR             = 16,
    /** Co @stable ICU 2.0 */
    U_PRIVATE_USE_CHAR        = 17,
    /** Cs @stable ICU 2.0 */
    U_SURROGATE               = 18,
    /** Pd @stable ICU 2.0 */
    U_DASH_PUNCTUATION        = 19,
    /** Ps @stable ICU 2.0 */
    U_START_PUNCTUATION       = 20,
    /** Pe @stable ICU 2.0 */
    U_END_PUNCTUATION         = 21,
    /** Pc @stable ICU 2.0 */
    U_CONNECTOR_PUNCTUATION   = 22,
    /** Po @stable ICU 2.0 */
    U_OTHER_PUNCTUATION       = 23,
    /** Sm @stable ICU 2.0 */
    U_MATH_SYMBOL             = 24,
    /** Sc @stable ICU 2.0 */
    U_CURRENCY_SYMBOL         = 25,
    /** Sk @stable ICU 2.0 */
    U_MODIFIER_SYMBOL         = 26,
    /** So @stable ICU 2.0 */
    U_OTHER_SYMBOL            = 27,
    /** Pi @stable ICU 2.0 */
    U_INITIAL_PUNCTUATION     = 28,
    /** Pf @stable ICU 2.0 */
    U_FINAL_PUNCTUATION       = 29,
    /**
     * One higher than the last enum UCharCategory constant.
     * This numeric value is stable (will not change), see
     * http://www.unicode.org/policies/stability_policy.html#Property_Value
     *
     * @stable ICU 2.0
     */
    U_CHAR_CATEGORY_COUNT
} UCharCategory;

/**
 * U_GC_XX_MASK constants are bit flags corresponding to Unicode
 * general category values.
 * For each category, the nth bit is set if the numeric value of the
 * corresponding UCharCategory constant is n.
 *
 * There are also some U_GC_Y_MASK constants for groups of general categories
 * like L for all letter categories.
 *
 * @see u_charType
 * @see U_GET_GC_MASK
 * @see UCharCategory
 * @stable ICU 2.1
 */
#define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CS_MASK    U_MASK(U_SURROGATE)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)


/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
#define U_GC_L_MASK \
            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)

/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
#define U_GC_LC_MASK \
            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)

/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)

/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)

/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)

/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
#define U_GC_C_MASK \
            (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)

/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
#define U_GC_P_MASK \
            (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
             U_GC_PI_MASK|U_GC_PF_MASK)

/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)

/**
 * This specifies the language directional property of a character set.
 * @stable ICU 2.0
 */
typedef enum UCharDirection {
    /*
     * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
     * It matches pairs of lines like
     *     / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
     *     U_<[A-Z_]+> = <integer>,
     */

    /** L @stable ICU 2.0 */
    U_LEFT_TO_RIGHT               = 0,
    /** R @stable ICU 2.0 */
    U_RIGHT_TO_LEFT               = 1,
    /** EN @stable ICU 2.0 */
    U_EUROPEAN_NUMBER             = 2,
    /** ES @stable ICU 2.0 */
    U_EUROPEAN_NUMBER_SEPARATOR   = 3,
    /** ET @stable ICU 2.0 */
    U_EUROPEAN_NUMBER_TERMINATOR  = 4,
    /** AN @stable ICU 2.0 */
    U_ARABIC_NUMBER               = 5,
    /** CS @stable ICU 2.0 */
    U_COMMON_NUMBER_SEPARATOR     = 6,
    /** B @stable ICU 2.0 */
    U_BLOCK_SEPARATOR             = 7,
    /** S @stable ICU 2.0 */
    U_SEGMENT_SEPARATOR           = 8,
    /** WS @stable ICU 2.0 */
    U_WHITE_SPACE_NEUTRAL         = 9,
    /** ON @stable ICU 2.0 */
    U_OTHER_NEUTRAL               = 10,
    /** LRE @stable ICU 2.0 */
    U_LEFT_TO_RIGHT_EMBEDDING     = 11,
    /** LRO @stable ICU 2.0 */
    U_LEFT_TO_RIGHT_OVERRIDE      = 12,
    /** AL @stable ICU 2.0 */
    U_RIGHT_TO_LEFT_ARABIC        = 13,
    /** RLE @stable ICU 2.0 */
    U_RIGHT_TO_LEFT_EMBEDDING     = 14,
    /** RLO @stable ICU 2.0 */
    U_RIGHT_TO_LEFT_OVERRIDE      = 15,
    /** PDF @stable ICU 2.0 */
    U_POP_DIRECTIONAL_FORMAT      = 16,
    /** NSM @stable ICU 2.0 */
    U_DIR_NON_SPACING_MARK        = 17,
    /** BN @stable ICU 2.0 */
    U_BOUNDARY_NEUTRAL            = 18,
    /** FSI @stable ICU 52 */
    U_FIRST_STRONG_ISOLATE        = 19,
    /** LRI @stable ICU 52 */
    U_LEFT_TO_RIGHT_ISOLATE       = 20,
    /** RLI @stable ICU 52 */
    U_RIGHT_TO_LEFT_ISOLATE       = 21,
    /** PDI @stable ICU 52 */
    U_POP_DIRECTIONAL_ISOLATE     = 22,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest UCharDirection value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_CHAR_DIRECTION_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UCharDirection;

/**
 * Bidi Paired Bracket Type constants.
 *
 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
 * @stable ICU 52
 */
typedef enum UBidiPairedBracketType {
    /*
     * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
     */

    /** Not a paired bracket. @stable ICU 52 */
    U_BPT_NONE,
    /** Open paired bracket. @stable ICU 52 */
    U_BPT_OPEN,
    /** Close paired bracket. @stable ICU 52 */
    U_BPT_CLOSE,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UBidiPairedBracketType value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_BPT_COUNT /* 3 */
#endif  // U_HIDE_DEPRECATED_API
} UBidiPairedBracketType;

/**
 * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
 * @stable ICU 2.0
 */
enum UBlockCode {
    /*
     * Note: UBlockCode constants are parsed by preparseucd.py.
     * It matches lines like
     *     UBLOCK_<Unicode Block value name> = <integer>,
     */

    /** New No_Block value in Unicode 4. @stable ICU 2.6 */
    UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */

    /** @stable ICU 2.0 */
    UBLOCK_BASIC_LATIN = 1, /*[0000]*/

    /** @stable ICU 2.0 */
    UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/

    /** @stable ICU 2.0 */
    UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/

    /** @stable ICU 2.0 */
    UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/

    /** @stable ICU 2.0 */
    UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/

    /** @stable ICU 2.0 */
    UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/

    /** @stable ICU 2.0 */
    UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/

    /**
     * Unicode 3.2 renames this block to "Greek and Coptic".
     * @stable ICU 2.0
     */
    UBLOCK_GREEK =8, /*[0370]*/

    /** @stable ICU 2.0 */
    UBLOCK_CYRILLIC =9, /*[0400]*/

    /** @stable ICU 2.0 */
    UBLOCK_ARMENIAN =10, /*[0530]*/

    /** @stable ICU 2.0 */
    UBLOCK_HEBREW =11, /*[0590]*/

    /** @stable ICU 2.0 */
    UBLOCK_ARABIC =12, /*[0600]*/

    /** @stable ICU 2.0 */
    UBLOCK_SYRIAC =13, /*[0700]*/

    /** @stable ICU 2.0 */
    UBLOCK_THAANA =14, /*[0780]*/

    /** @stable ICU 2.0 */
    UBLOCK_DEVANAGARI =15, /*[0900]*/

    /** @stable ICU 2.0 */
    UBLOCK_BENGALI =16, /*[0980]*/

    /** @stable ICU 2.0 */
    UBLOCK_GURMUKHI =17, /*[0A00]*/

    /** @stable ICU 2.0 */
    UBLOCK_GUJARATI =18, /*[0A80]*/

    /** @stable ICU 2.0 */
    UBLOCK_ORIYA =19, /*[0B00]*/

    /** @stable ICU 2.0 */
    UBLOCK_TAMIL =20, /*[0B80]*/

    /** @stable ICU 2.0 */
    UBLOCK_TELUGU =21, /*[0C00]*/

    /** @stable ICU 2.0 */
    UBLOCK_KANNADA =22, /*[0C80]*/

    /** @stable ICU 2.0 */
    UBLOCK_MALAYALAM =23, /*[0D00]*/

    /** @stable ICU 2.0 */
    UBLOCK_SINHALA =24, /*[0D80]*/

    /** @stable ICU 2.0 */
    UBLOCK_THAI =25, /*[0E00]*/

    /** @stable ICU 2.0 */
    UBLOCK_LAO =26, /*[0E80]*/

    /** @stable ICU 2.0 */
    UBLOCK_TIBETAN =27, /*[0F00]*/

    /** @stable ICU 2.0 */
    UBLOCK_MYANMAR =28, /*[1000]*/

    /** @stable ICU 2.0 */
    UBLOCK_GEORGIAN =29, /*[10A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_HANGUL_JAMO =30, /*[1100]*/

    /** @stable ICU 2.0 */
    UBLOCK_ETHIOPIC =31, /*[1200]*/

    /** @stable ICU 2.0 */
    UBLOCK_CHEROKEE =32, /*[13A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/

    /** @stable ICU 2.0 */
    UBLOCK_OGHAM =34, /*[1680]*/

    /** @stable ICU 2.0 */
    UBLOCK_RUNIC =35, /*[16A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_KHMER =36, /*[1780]*/

    /** @stable ICU 2.0 */
    UBLOCK_MONGOLIAN =37, /*[1800]*/

    /** @stable ICU 2.0 */
    UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/

    /** @stable ICU 2.0 */
    UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/

    /** @stable ICU 2.0 */
    UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/

    /** @stable ICU 2.0 */
    UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/

    /** @stable ICU 2.0 */
    UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/

    /**
     * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
     * @stable ICU 2.0
     */
    UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/

    /** @stable ICU 2.0 */
    UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/

    /** @stable ICU 2.0 */
    UBLOCK_NUMBER_FORMS =45, /*[2150]*/

    /** @stable ICU 2.0 */
    UBLOCK_ARROWS =46, /*[2190]*/

    /** @stable ICU 2.0 */
    UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/

    /** @stable ICU 2.0 */
    UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/

    /** @stable ICU 2.0 */
    UBLOCK_CONTROL_PICTURES =49, /*[2400]*/

    /** @stable ICU 2.0 */
    UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/

    /** @stable ICU 2.0 */
    UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/

    /** @stable ICU 2.0 */
    UBLOCK_BOX_DRAWING =52, /*[2500]*/

    /** @stable ICU 2.0 */
    UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/

    /** @stable ICU 2.0 */
    UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/

    /** @stable ICU 2.0 */
    UBLOCK_DINGBATS =56, /*[2700]*/

    /** @stable ICU 2.0 */
    UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/

    /** @stable ICU 2.0 */
    UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/

    /** @stable ICU 2.0 */
    UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/

    /** @stable ICU 2.0 */
    UBLOCK_HIRAGANA =62, /*[3040]*/

    /** @stable ICU 2.0 */
    UBLOCK_KATAKANA =63, /*[30A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_BOPOMOFO =64, /*[3100]*/

    /** @stable ICU 2.0 */
    UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/

    /** @stable ICU 2.0 */
    UBLOCK_KANBUN =66, /*[3190]*/

    /** @stable ICU 2.0 */
    UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/

    /** @stable ICU 2.0 */
    UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/

    /** @stable ICU 2.0 */
    UBLOCK_YI_SYLLABLES =72, /*[A000]*/

    /** @stable ICU 2.0 */
    UBLOCK_YI_RADICALS =73, /*[A490]*/

    /** @stable ICU 2.0 */
    UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/

    /** @stable ICU 2.0 */
    UBLOCK_HIGH_SURROGATES =75, /*[D800]*/

    /** @stable ICU 2.0 */
    UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/

    /** @stable ICU 2.0 */
    UBLOCK_LOW_SURROGATES =77, /*[DC00]*/

    /**
     * Same as UBLOCK_PRIVATE_USE.
     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
     * and multiple code point ranges had this block.
     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
     * adds separate blocks for the supplementary PUAs.
     *
     * @stable ICU 2.0
     */
    UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
    /**
     * Same as UBLOCK_PRIVATE_USE_AREA.
     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
     * and multiple code point ranges had this block.
     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
     * adds separate blocks for the supplementary PUAs.
     *
     * @stable ICU 2.0
     */
    UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,

    /** @stable ICU 2.0 */
    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/

    /** @stable ICU 2.0 */
    UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/

    /** @stable ICU 2.0 */
    UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/

    /** @stable ICU 2.0 */
    UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/

    /** @stable ICU 2.0 */
    UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/

    /** @stable ICU 2.0 */
    UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/

    /** @stable ICU 2.0 */
    UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/

    /** @stable ICU 2.0 */
    UBLOCK_SPECIALS =86, /*[FFF0]*/

    /** @stable ICU 2.0 */
    UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/

    /* New blocks in Unicode 3.1 */

    /** @stable ICU 2.0 */
    UBLOCK_OLD_ITALIC = 88, /*[10300]*/
    /** @stable ICU 2.0 */
    UBLOCK_GOTHIC = 89, /*[10330]*/
    /** @stable ICU 2.0 */
    UBLOCK_DESERET = 90, /*[10400]*/
    /** @stable ICU 2.0 */
    UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
    /** @stable ICU 2.0 */
    UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
    /** @stable ICU 2.0 */
    UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
    /** @stable ICU 2.0 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94, /*[20000]*/
    /** @stable ICU 2.0 */
    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
    /** @stable ICU 2.0 */
    UBLOCK_TAGS = 96, /*[E0000]*/

    /* New blocks in Unicode 3.2 */

    /** @stable ICU 3.0  */
    UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
    /**
     * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
     * @stable ICU 2.2
     */
    UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
    /** @stable ICU 2.2 */
    UBLOCK_TAGALOG = 98, /*[1700]*/
    /** @stable ICU 2.2 */
    UBLOCK_HANUNOO = 99, /*[1720]*/
    /** @stable ICU 2.2 */
    UBLOCK_BUHID = 100, /*[1740]*/
    /** @stable ICU 2.2 */
    UBLOCK_TAGBANWA = 101, /*[1760]*/
    /** @stable ICU 2.2 */
    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
    /** @stable ICU 2.2 */
    UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
    /** @stable ICU 2.2 */
    UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
    /** @stable ICU 2.2 */
    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
    /** @stable ICU 2.2 */
    UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
    /** @stable ICU 2.2 */
    UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
    /** @stable ICU 2.2 */
    UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
    /** @stable ICU 2.2 */
    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
    /** @stable ICU 2.2 */
    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/

    /* New blocks in Unicode 4 */

    /** @stable ICU 2.6 */
    UBLOCK_LIMBU = 111, /*[1900]*/
    /** @stable ICU 2.6 */
    UBLOCK_TAI_LE = 112, /*[1950]*/
    /** @stable ICU 2.6 */
    UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
    /** @stable ICU 2.6 */
    UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
    /** @stable ICU 2.6 */
    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
    /** @stable ICU 2.6 */
    UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
    /** @stable ICU 2.6 */
    UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
    /** @stable ICU 2.6 */
    UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
    /** @stable ICU 2.6 */
    UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
    /** @stable ICU 2.6 */
    UBLOCK_UGARITIC = 120, /*[10380]*/
    /** @stable ICU 2.6 */
    UBLOCK_SHAVIAN = 121, /*[10450]*/
    /** @stable ICU 2.6 */
    UBLOCK_OSMANYA = 122, /*[10480]*/
    /** @stable ICU 2.6 */
    UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
    /** @stable ICU 2.6 */
    UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
    /** @stable ICU 2.6 */
    UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/

    /* New blocks in Unicode 4.1 */

    /** @stable ICU 3.4 */
    UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
    /** @stable ICU 3.4 */
    UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
    /** @stable ICU 3.4 */
    UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
    /** @stable ICU 3.4 */
    UBLOCK_BUGINESE = 129, /*[1A00]*/
    /** @stable ICU 3.4 */
    UBLOCK_CJK_STROKES = 130, /*[31C0]*/
    /** @stable ICU 3.4 */
    UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
    /** @stable ICU 3.4 */
    UBLOCK_COPTIC = 132, /*[2C80]*/
    /** @stable ICU 3.4 */
    UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
    /** @stable ICU 3.4 */
    UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
    /** @stable ICU 3.4 */
    UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
    /** @stable ICU 3.4 */
    UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
    /** @stable ICU 3.4 */
    UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
    /** @stable ICU 3.4 */
    UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
    /** @stable ICU 3.4 */
    UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
    /** @stable ICU 3.4 */
    UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
    /** @stable ICU 3.4 */
    UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
    /** @stable ICU 3.4 */
    UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
    /** @stable ICU 3.4 */
    UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
    /** @stable ICU 3.4 */
    UBLOCK_TIFINAGH = 144, /*[2D30]*/
    /** @stable ICU 3.4 */
    UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/

    /* New blocks in Unicode 5.0 */

    /** @stable ICU 3.6 */
    UBLOCK_NKO = 146, /*[07C0]*/
    /** @stable ICU 3.6 */
    UBLOCK_BALINESE = 147, /*[1B00]*/
    /** @stable ICU 3.6 */
    UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
    /** @stable ICU 3.6 */
    UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
    /** @stable ICU 3.6 */
    UBLOCK_PHAGS_PA = 150, /*[A840]*/
    /** @stable ICU 3.6 */
    UBLOCK_PHOENICIAN = 151, /*[10900]*/
    /** @stable ICU 3.6 */
    UBLOCK_CUNEIFORM = 152, /*[12000]*/
    /** @stable ICU 3.6 */
    UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
    /** @stable ICU 3.6 */
    UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/

    /* New blocks in Unicode 5.1 */

    /** @stable ICU 4.0 */
    UBLOCK_SUNDANESE = 155, /*[1B80]*/
    /** @stable ICU 4.0 */
    UBLOCK_LEPCHA = 156, /*[1C00]*/
    /** @stable ICU 4.0 */
    UBLOCK_OL_CHIKI = 157, /*[1C50]*/
    /** @stable ICU 4.0 */
    UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
    /** @stable ICU 4.0 */
    UBLOCK_VAI = 159, /*[A500]*/
    /** @stable ICU 4.0 */
    UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
    /** @stable ICU 4.0 */
    UBLOCK_SAURASHTRA = 161, /*[A880]*/
    /** @stable ICU 4.0 */
    UBLOCK_KAYAH_LI = 162, /*[A900]*/
    /** @stable ICU 4.0 */
    UBLOCK_REJANG = 163, /*[A930]*/
    /** @stable ICU 4.0 */
    UBLOCK_CHAM = 164, /*[AA00]*/
    /** @stable ICU 4.0 */
    UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
    /** @stable ICU 4.0 */
    UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
    /** @stable ICU 4.0 */
    UBLOCK_LYCIAN = 167, /*[10280]*/
    /** @stable ICU 4.0 */
    UBLOCK_CARIAN = 168, /*[102A0]*/
    /** @stable ICU 4.0 */
    UBLOCK_LYDIAN = 169, /*[10920]*/
    /** @stable ICU 4.0 */
    UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
    /** @stable ICU 4.0 */
    UBLOCK_DOMINO_TILES = 171, /*[1F030]*/

    /* New blocks in Unicode 5.2 */

    /** @stable ICU 4.4 */
    UBLOCK_SAMARITAN = 172, /*[0800]*/
    /** @stable ICU 4.4 */
    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
    /** @stable ICU 4.4 */
    UBLOCK_TAI_THAM = 174, /*[1A20]*/
    /** @stable ICU 4.4 */
    UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
    /** @stable ICU 4.4 */
    UBLOCK_LISU = 176, /*[A4D0]*/
    /** @stable ICU 4.4 */
    UBLOCK_BAMUM = 177, /*[A6A0]*/
    /** @stable ICU 4.4 */
    UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
    /** @stable ICU 4.4 */
    UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
    /** @stable ICU 4.4 */
    UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
    /** @stable ICU 4.4 */
    UBLOCK_JAVANESE = 181, /*[A980]*/
    /** @stable ICU 4.4 */
    UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
    /** @stable ICU 4.4 */
    UBLOCK_TAI_VIET = 183, /*[AA80]*/
    /** @stable ICU 4.4 */
    UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
    /** @stable ICU 4.4 */
    UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
    /** @stable ICU 4.4 */
    UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
    /** @stable ICU 4.4 */
    UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
    /** @stable ICU 4.4 */
    UBLOCK_AVESTAN = 188, /*[10B00]*/
    /** @stable ICU 4.4 */
    UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
    /** @stable ICU 4.4 */
    UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
    /** @stable ICU 4.4 */
    UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
    /** @stable ICU 4.4 */
    UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
    /** @stable ICU 4.4 */
    UBLOCK_KAITHI = 193, /*[11080]*/
    /** @stable ICU 4.4 */
    UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
    /** @stable ICU 4.4 */
    UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
    /** @stable ICU 4.4 */
    UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
    /** @stable ICU 4.4 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/

    /* New blocks in Unicode 6.0 */

    /** @stable ICU 4.6 */
    UBLOCK_MANDAIC = 198, /*[0840]*/
    /** @stable ICU 4.6 */
    UBLOCK_BATAK = 199, /*[1BC0]*/
    /** @stable ICU 4.6 */
    UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
    /** @stable ICU 4.6 */
    UBLOCK_BRAHMI = 201, /*[11000]*/
    /** @stable ICU 4.6 */
    UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
    /** @stable ICU 4.6 */
    UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
    /** @stable ICU 4.6 */
    UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
    /** @stable ICU 4.6 */
    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
    /** @stable ICU 4.6 */
    UBLOCK_EMOTICONS = 206, /*[1F600]*/
    /** @stable ICU 4.6 */
    UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
    /** @stable ICU 4.6 */
    UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
    /** @stable ICU 4.6 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/

    /* New blocks in Unicode 6.1 */

    /** @stable ICU 49 */
    UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
    /** @stable ICU 49 */
    UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
    /** @stable ICU 49 */
    UBLOCK_CHAKMA = 212, /*[11100]*/
    /** @stable ICU 49 */
    UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
    /** @stable ICU 49 */
    UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
    /** @stable ICU 49 */
    UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
    /** @stable ICU 49 */
    UBLOCK_MIAO = 216, /*[16F00]*/
    /** @stable ICU 49 */
    UBLOCK_SHARADA = 217, /*[11180]*/
    /** @stable ICU 49 */
    UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
    /** @stable ICU 49 */
    UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
    /** @stable ICU 49 */
    UBLOCK_TAKRI = 220, /*[11680]*/

    /* New blocks in Unicode 7.0 */

    /** @stable ICU 54 */
    UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
    /** @stable ICU 54 */
    UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
    /** @stable ICU 54 */
    UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
    /** @stable ICU 54 */
    UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
    /** @stable ICU 54 */
    UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
    /** @stable ICU 54 */
    UBLOCK_ELBASAN = 226, /*[10500]*/
    /** @stable ICU 54 */
    UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
    /** @stable ICU 54 */
    UBLOCK_GRANTHA = 228, /*[11300]*/
    /** @stable ICU 54 */
    UBLOCK_KHOJKI = 229, /*[11200]*/
    /** @stable ICU 54 */
    UBLOCK_KHUDAWADI = 230, /*[112B0]*/
    /** @stable ICU 54 */
    UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
    /** @stable ICU 54 */
    UBLOCK_LINEAR_A = 232, /*[10600]*/
    /** @stable ICU 54 */
    UBLOCK_MAHAJANI = 233, /*[11150]*/
    /** @stable ICU 54 */
    UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
    /** @stable ICU 54 */
    UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
    /** @stable ICU 54 */
    UBLOCK_MODI = 236, /*[11600]*/
    /** @stable ICU 54 */
    UBLOCK_MRO = 237, /*[16A40]*/
    /** @stable ICU 54 */
    UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
    /** @stable ICU 54 */
    UBLOCK_NABATAEAN = 239, /*[10880]*/
    /** @stable ICU 54 */
    UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
    /** @stable ICU 54 */
    UBLOCK_OLD_PERMIC = 241, /*[10350]*/
    /** @stable ICU 54 */
    UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
    /** @stable ICU 54 */
    UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
    /** @stable ICU 54 */
    UBLOCK_PALMYRENE = 244, /*[10860]*/
    /** @stable ICU 54 */
    UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
    /** @stable ICU 54 */
    UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
    /** @stable ICU 54 */
    UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
    /** @stable ICU 54 */
    UBLOCK_SIDDHAM = 248, /*[11580]*/
    /** @stable ICU 54 */
    UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
    /** @stable ICU 54 */
    UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
    /** @stable ICU 54 */
    UBLOCK_TIRHUTA = 251, /*[11480]*/
    /** @stable ICU 54 */
    UBLOCK_WARANG_CITI = 252, /*[118A0]*/

    /* New blocks in Unicode 8.0 */

    /** @stable ICU 56 */
    UBLOCK_AHOM = 253, /*[11700]*/
    /** @stable ICU 56 */
    UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
    /** @stable ICU 56 */
    UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
    /** @stable ICU 56 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/
    /** @stable ICU 56 */
    UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/
    /** @stable ICU 56 */
    UBLOCK_HATRAN = 258, /*[108E0]*/
    /** @stable ICU 56 */
    UBLOCK_MULTANI = 259, /*[11280]*/
    /** @stable ICU 56 */
    UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
    /** @stable ICU 56 */
    UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/
    /** @stable ICU 56 */
    UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/

    /* New blocks in Unicode 9.0 */

    /** @stable ICU 58 */
    UBLOCK_ADLAM = 263, /*[1E900]*/
    /** @stable ICU 58 */
    UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
    /** @stable ICU 58 */
    UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
    /** @stable ICU 58 */
    UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
    /** @stable ICU 58 */
    UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/
    /** @stable ICU 58 */
    UBLOCK_MARCHEN = 268, /*[11C70]*/
    /** @stable ICU 58 */
    UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
    /** @stable ICU 58 */
    UBLOCK_NEWA = 270, /*[11400]*/
    /** @stable ICU 58 */
    UBLOCK_OSAGE = 271, /*[104B0]*/
    /** @stable ICU 58 */
    UBLOCK_TANGUT = 272, /*[17000]*/
    /** @stable ICU 58 */
    UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/

    // New blocks in Unicode 10.0

    /** @stable ICU 60 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
    /** @stable ICU 60 */
    UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
    /** @stable ICU 60 */
    UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
    /** @stable ICU 60 */
    UBLOCK_NUSHU = 277, /*[1B170]*/
    /** @stable ICU 60 */
    UBLOCK_SOYOMBO = 278, /*[11A50]*/
    /** @stable ICU 60 */
    UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
    /** @stable ICU 60 */
    UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/

    // New blocks in Unicode 11.0

    /** @stable ICU 62 */
    UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/
    /** @stable ICU 62 */
    UBLOCK_DOGRA = 282, /*[11800]*/
    /** @stable ICU 62 */
    UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/
    /** @stable ICU 62 */
    UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/
    /** @stable ICU 62 */
    UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/
    /** @stable ICU 62 */
    UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/
    /** @stable ICU 62 */
    UBLOCK_MAKASAR = 287, /*[11EE0]*/
    /** @stable ICU 62 */
    UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/
    /** @stable ICU 62 */
    UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/
    /** @stable ICU 62 */
    UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/
    /** @stable ICU 62 */
    UBLOCK_SOGDIAN = 291, /*[10F30]*/

    // New blocks in Unicode 12.0

    /** @stable ICU 64 */
    UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/
    /** @stable ICU 64 */
    UBLOCK_ELYMAIC = 293, /*[10FE0]*/
    /** @stable ICU 64 */
    UBLOCK_NANDINAGARI = 294, /*[119A0]*/
    /** @stable ICU 64 */
    UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/
    /** @stable ICU 64 */
    UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/
    /** @stable ICU 64 */
    UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/
    /** @stable ICU 64 */
    UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/
    /** @stable ICU 64 */
    UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/
    /** @stable ICU 64 */
    UBLOCK_WANCHO = 300, /*[1E2C0]*/

    // New blocks in Unicode 13.0

    /** @stable ICU 66 */
    UBLOCK_CHORASMIAN = 301, /*[10FB0]*/
    /** @stable ICU 66 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, /*[30000]*/
    /** @stable ICU 66 */
    UBLOCK_DIVES_AKURU = 303, /*[11900]*/
    /** @stable ICU 66 */
    UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/
    /** @stable ICU 66 */
    UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/
    /** @stable ICU 66 */
    UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, /*[1FB00]*/
    /** @stable ICU 66 */
    UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/
    /** @stable ICU 66 */
    UBLOCK_YEZIDI = 308, /*[10E80]*/

    // New blocks in Unicode 14.0

    /** @stable ICU 70 */
    UBLOCK_ARABIC_EXTENDED_B = 309, /*[0870]*/
    /** @stable ICU 70 */
    UBLOCK_CYPRO_MINOAN = 310, /*[12F90]*/
    /** @stable ICU 70 */
    UBLOCK_ETHIOPIC_EXTENDED_B = 311, /*[1E7E0]*/
    /** @stable ICU 70 */
    UBLOCK_KANA_EXTENDED_B = 312, /*[1AFF0]*/
    /** @stable ICU 70 */
    UBLOCK_LATIN_EXTENDED_F = 313, /*[10780]*/
    /** @stable ICU 70 */
    UBLOCK_LATIN_EXTENDED_G = 314, /*[1DF00]*/
    /** @stable ICU 70 */
    UBLOCK_OLD_UYGHUR = 315, /*[10F70]*/
    /** @stable ICU 70 */
    UBLOCK_TANGSA = 316, /*[16A70]*/
    /** @stable ICU 70 */
    UBLOCK_TOTO = 317, /*[1E290]*/
    /** @stable ICU 70 */
    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318, /*[11AB0]*/
    /** @stable ICU 70 */
    UBLOCK_VITHKUQI = 319, /*[10570]*/
    /** @stable ICU 70 */
    UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320, /*[1CF00]*/

    // New blocks in Unicode 15.0

    /** @stable ICU 72 */
    UBLOCK_ARABIC_EXTENDED_C = 321, /*[10EC0]*/
    /** @stable ICU 72 */
    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322, /*[31350]*/
    /** @stable ICU 72 */
    UBLOCK_CYRILLIC_EXTENDED_D = 323, /*[1E030]*/
    /** @stable ICU 72 */
    UBLOCK_DEVANAGARI_EXTENDED_A = 324, /*[11B00]*/
    /** @stable ICU 72 */
    UBLOCK_KAKTOVIK_NUMERALS = 325, /*[1D2C0]*/
    /** @stable ICU 72 */
    UBLOCK_KAWI = 326, /*[11F00]*/
    /** @stable ICU 72 */
    UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/

#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UBlockCode value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UBLOCK_COUNT = 328,
#endif  // U_HIDE_DEPRECATED_API

    /** @stable ICU 2.0 */
    UBLOCK_INVALID_CODE=-1
};

/** @stable ICU 2.0 */
typedef enum UBlockCode UBlockCode;

/**
 * East Asian Width constants.
 *
 * @see UCHAR_EAST_ASIAN_WIDTH
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
typedef enum UEastAsianWidth {
    /*
     * Note: UEastAsianWidth constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_EA_<Unicode East_Asian_Width value name>
     */

    U_EA_NEUTRAL,   /*[N]*/
    U_EA_AMBIGUOUS, /*[A]*/
    U_EA_HALFWIDTH, /*[H]*/
    U_EA_FULLWIDTH, /*[F]*/
    U_EA_NARROW,    /*[Na]*/
    U_EA_WIDE,      /*[W]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UEastAsianWidth value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_EA_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UEastAsianWidth;

/**
 * Selector constants for u_charName().
 * u_charName() returns the "modern" name of a
 * Unicode character; or the name that was defined in
 * Unicode version 1.0, before the Unicode standard merged
 * with ISO-10646; or an "extended" name that gives each
 * Unicode code point a unique name.
 *
 * @see u_charName
 * @stable ICU 2.0
 */
typedef enum UCharNameChoice {
    /** Unicode character name (Name property). @stable ICU 2.0 */
    U_UNICODE_CHAR_NAME,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * The Unicode_1_Name property value which is of little practical value.
     * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
     * @deprecated ICU 49
     */
    U_UNICODE_10_CHAR_NAME,
#endif  /* U_HIDE_DEPRECATED_API */
    /** Standard or synthetic character name. @stable ICU 2.0 */
    U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
    /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
    U_CHAR_NAME_ALIAS,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UCharNameChoice value.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_CHAR_NAME_CHOICE_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UCharNameChoice;

/**
 * Selector constants for u_getPropertyName() and
 * u_getPropertyValueName().  These selectors are used to choose which
 * name is returned for a given property or value.  All properties and
 * values have a long name.  Most have a short name, but some do not.
 * Unicode allows for additional names, beyond the long and short
 * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
 * i=1, 2,...
 *
 * @see u_getPropertyName()
 * @see u_getPropertyValueName()
 * @stable ICU 2.4
 */
typedef enum UPropertyNameChoice {
    U_SHORT_PROPERTY_NAME,
    U_LONG_PROPERTY_NAME,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UPropertyNameChoice value.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_PROPERTY_NAME_CHOICE_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UPropertyNameChoice;

/**
 * Decomposition Type constants.
 *
 * @see UCHAR_DECOMPOSITION_TYPE
 * @stable ICU 2.2
 */
typedef enum UDecompositionType {
    /*
     * Note: UDecompositionType constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_DT_<Unicode Decomposition_Type value name>
     */

    U_DT_NONE,              /*[none]*/
    U_DT_CANONICAL,         /*[can]*/
    U_DT_COMPAT,            /*[com]*/
    U_DT_CIRCLE,            /*[enc]*/
    U_DT_FINAL,             /*[fin]*/
    U_DT_FONT,              /*[font]*/
    U_DT_FRACTION,          /*[fra]*/
    U_DT_INITIAL,           /*[init]*/
    U_DT_ISOLATED,          /*[iso]*/
    U_DT_MEDIAL,            /*[med]*/
    U_DT_NARROW,            /*[nar]*/
    U_DT_NOBREAK,           /*[nb]*/
    U_DT_SMALL,             /*[sml]*/
    U_DT_SQUARE,            /*[sqr]*/
    U_DT_SUB,               /*[sub]*/
    U_DT_SUPER,             /*[sup]*/
    U_DT_VERTICAL,          /*[vert]*/
    U_DT_WIDE,              /*[wide]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UDecompositionType value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_DT_COUNT /* 18 */
#endif  // U_HIDE_DEPRECATED_API
} UDecompositionType;

/**
 * Joining Type constants.
 *
 * @see UCHAR_JOINING_TYPE
 * @stable ICU 2.2
 */
typedef enum UJoiningType {
    /*
     * Note: UJoiningType constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_JT_<Unicode Joining_Type value name>
     */

    U_JT_NON_JOINING,       /*[U]*/
    U_JT_JOIN_CAUSING,      /*[C]*/
    U_JT_DUAL_JOINING,      /*[D]*/
    U_JT_LEFT_JOINING,      /*[L]*/
    U_JT_RIGHT_JOINING,     /*[R]*/
    U_JT_TRANSPARENT,       /*[T]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UJoiningType value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_JT_COUNT /* 6 */
#endif  // U_HIDE_DEPRECATED_API
} UJoiningType;

/**
 * Joining Group constants.
 *
 * @see UCHAR_JOINING_GROUP
 * @stable ICU 2.2
 */
typedef enum UJoiningGroup {
    /*
     * Note: UJoiningGroup constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_JG_<Unicode Joining_Group value name>
     */

    U_JG_NO_JOINING_GROUP,
    U_JG_AIN,
    U_JG_ALAPH,
    U_JG_ALEF,
    U_JG_BEH,
    U_JG_BETH,
    U_JG_DAL,
    U_JG_DALATH_RISH,
    U_JG_E,
    U_JG_FEH,
    U_JG_FINAL_SEMKATH,
    U_JG_GAF,
    U_JG_GAMAL,
    U_JG_HAH,
    U_JG_TEH_MARBUTA_GOAL,  /**< @stable ICU 4.6 */
    U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
    U_JG_HE,
    U_JG_HEH,
    U_JG_HEH_GOAL,
    U_JG_HETH,
    U_JG_KAF,
    U_JG_KAPH,
    U_JG_KNOTTED_HEH,
    U_JG_LAM,
    U_JG_LAMADH,
    U_JG_MEEM,
    U_JG_MIM,
    U_JG_NOON,
    U_JG_NUN,
    U_JG_PE,
    U_JG_QAF,
    U_JG_QAPH,
    U_JG_REH,
    U_JG_REVERSED_PE,
    U_JG_SAD,
    U_JG_SADHE,
    U_JG_SEEN,
    U_JG_SEMKATH,
    U_JG_SHIN,
    U_JG_SWASH_KAF,
    U_JG_SYRIAC_WAW,
    U_JG_TAH,
    U_JG_TAW,
    U_JG_TEH_MARBUTA,
    U_JG_TETH,
    U_JG_WAW,
    U_JG_YEH,
    U_JG_YEH_BARREE,
    U_JG_YEH_WITH_TAIL,
    U_JG_YUDH,
    U_JG_YUDH_HE,
    U_JG_ZAIN,
    U_JG_FE,        /**< @stable ICU 2.6 */
    U_JG_KHAPH,     /**< @stable ICU 2.6 */
    U_JG_ZHAIN,     /**< @stable ICU 2.6 */
    U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
    U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
    U_JG_NYA,       /**< @stable ICU 4.4 */
    U_JG_ROHINGYA_YEH,  /**< @stable ICU 49 */
    U_JG_MANICHAEAN_ALEPH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_AYIN,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_BETH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_DALETH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_DHAMEDH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_FIVE,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_GIMEL,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_HETH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_HUNDRED,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_KAPH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_LAMEDH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_MEM,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_NUN,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_ONE,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_PE,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_QOPH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_RESH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_SADHE,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_SAMEKH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_TAW,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_TEN,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_TETH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_THAMEDH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_TWENTY,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_WAW,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_YODH,  /**< @stable ICU 54 */
    U_JG_MANICHAEAN_ZAYIN,  /**< @stable ICU 54 */
    U_JG_STRAIGHT_WAW,  /**< @stable ICU 54 */
    U_JG_AFRICAN_FEH,  /**< @stable ICU 58 */
    U_JG_AFRICAN_NOON,  /**< @stable ICU 58 */
    U_JG_AFRICAN_QAF,  /**< @stable ICU 58 */

    U_JG_MALAYALAM_BHA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_JA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_LLA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_LLLA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_NGA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_NNA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_NNNA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_NYA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_RA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_SSA,  /**< @stable ICU 60 */
    U_JG_MALAYALAM_TTA,  /**< @stable ICU 60 */

    U_JG_HANIFI_ROHINGYA_KINNA_YA,  /**< @stable ICU 62 */
    U_JG_HANIFI_ROHINGYA_PA,  /**< @stable ICU 62 */

    U_JG_THIN_YEH,  /**< @stable ICU 70 */
    U_JG_VERTICAL_TAIL,  /**< @stable ICU 70 */

#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UJoiningGroup value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_JG_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UJoiningGroup;

/**
 * Grapheme Cluster Break constants.
 *
 * @see UCHAR_GRAPHEME_CLUSTER_BREAK
 * @stable ICU 3.4
 */
typedef enum UGraphemeClusterBreak {
    /*
     * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_GCB_<Unicode Grapheme_Cluster_Break value name>
     */

    U_GCB_OTHER = 0,            /*[XX]*/
    U_GCB_CONTROL = 1,          /*[CN]*/
    U_GCB_CR = 2,               /*[CR]*/
    U_GCB_EXTEND = 3,           /*[EX]*/
    U_GCB_L = 4,                /*[L]*/
    U_GCB_LF = 5,               /*[LF]*/
    U_GCB_LV = 6,               /*[LV]*/
    U_GCB_LVT = 7,              /*[LVT]*/
    U_GCB_T = 8,                /*[T]*/
    U_GCB_V = 9,                /*[V]*/
    /** @stable ICU 4.0 */
    U_GCB_SPACING_MARK = 10,    /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
    /** @stable ICU 4.0 */
    U_GCB_PREPEND = 11,         /*[PP]*/
    /** @stable ICU 50 */
    U_GCB_REGIONAL_INDICATOR = 12,  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
    /** @stable ICU 58 */
    U_GCB_E_BASE = 13,          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
    /** @stable ICU 58 */
    U_GCB_E_BASE_GAZ = 14,      /*[EBG]*/
    /** @stable ICU 58 */
    U_GCB_E_MODIFIER = 15,      /*[EM]*/
    /** @stable ICU 58 */
    U_GCB_GLUE_AFTER_ZWJ = 16,  /*[GAZ]*/
    /** @stable ICU 58 */
    U_GCB_ZWJ = 17,             /*[ZWJ]*/

#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UGraphemeClusterBreak value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_GCB_COUNT = 18
#endif  // U_HIDE_DEPRECATED_API
} UGraphemeClusterBreak;

/**
 * Word Break constants.
 * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
 *
 * @see UCHAR_WORD_BREAK
 * @stable ICU 3.4
 */
typedef enum UWordBreakValues {
    /*
     * Note: UWordBreakValues constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_WB_<Unicode Word_Break value name>
     */

    U_WB_OTHER = 0,             /*[XX]*/
    U_WB_ALETTER = 1,           /*[LE]*/
    U_WB_FORMAT = 2,            /*[FO]*/
    U_WB_KATAKANA = 3,          /*[KA]*/
    U_WB_MIDLETTER = 4,         /*[ML]*/
    U_WB_MIDNUM = 5,            /*[MN]*/
    U_WB_NUMERIC = 6,           /*[NU]*/
    U_WB_EXTENDNUMLET = 7,      /*[EX]*/
    /** @stable ICU 4.0 */
    U_WB_CR = 8,                /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
    /** @stable ICU 4.0 */
    U_WB_EXTEND = 9,            /*[Extend]*/
    /** @stable ICU 4.0 */
    U_WB_LF = 10,               /*[LF]*/
    /** @stable ICU 4.0 */
    U_WB_MIDNUMLET =11,         /*[MB]*/
    /** @stable ICU 4.0 */
    U_WB_NEWLINE =12,           /*[NL]*/
    /** @stable ICU 50 */
    U_WB_REGIONAL_INDICATOR = 13,   /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
    /** @stable ICU 52 */
    U_WB_HEBREW_LETTER = 14,    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
    /** @stable ICU 52 */
    U_WB_SINGLE_QUOTE = 15,     /*[SQ]*/
    /** @stable ICU 52 */
    U_WB_DOUBLE_QUOTE = 16,     /*[DQ]*/
    /** @stable ICU 58 */
    U_WB_E_BASE = 17,           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
    /** @stable ICU 58 */
    U_WB_E_BASE_GAZ = 18,       /*[EBG]*/
    /** @stable ICU 58 */
    U_WB_E_MODIFIER = 19,       /*[EM]*/
    /** @stable ICU 58 */
    U_WB_GLUE_AFTER_ZWJ = 20,   /*[GAZ]*/
    /** @stable ICU 58 */
    U_WB_ZWJ = 21,              /*[ZWJ]*/
    /** @stable ICU 62 */
    U_WB_WSEGSPACE = 22,        /*[WSEGSPACE]*/

#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UWordBreakValues value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_WB_COUNT = 23
#endif  // U_HIDE_DEPRECATED_API
} UWordBreakValues;

/**
 * Sentence Break constants.
 *
 * @see UCHAR_SENTENCE_BREAK
 * @stable ICU 3.4
 */
typedef enum USentenceBreak {
    /*
     * Note: USentenceBreak constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_SB_<Unicode Sentence_Break value name>
     */

    U_SB_OTHER = 0,             /*[XX]*/
    U_SB_ATERM = 1,             /*[AT]*/
    U_SB_CLOSE = 2,             /*[CL]*/
    U_SB_FORMAT = 3,            /*[FO]*/
    U_SB_LOWER = 4,             /*[LO]*/
    U_SB_NUMERIC = 5,           /*[NU]*/
    U_SB_OLETTER = 6,           /*[LE]*/
    U_SB_SEP = 7,               /*[SE]*/
    U_SB_SP = 8,                /*[SP]*/
    U_SB_STERM = 9,             /*[ST]*/
    U_SB_UPPER = 10,            /*[UP]*/
    U_SB_CR = 11,               /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
    U_SB_EXTEND = 12,           /*[EX]*/
    U_SB_LF = 13,               /*[LF]*/
    U_SB_SCONTINUE = 14,        /*[SC]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal USentenceBreak value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_SB_COUNT = 15
#endif  // U_HIDE_DEPRECATED_API
} USentenceBreak;

/**
 * Line Break constants.
 *
 * @see UCHAR_LINE_BREAK
 * @stable ICU 2.2
 */
typedef enum ULineBreak {
    /*
     * Note: ULineBreak constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_LB_<Unicode Line_Break value name>
     */

    U_LB_UNKNOWN = 0,           /*[XX]*/
    U_LB_AMBIGUOUS = 1,         /*[AI]*/
    U_LB_ALPHABETIC = 2,        /*[AL]*/
    U_LB_BREAK_BOTH = 3,        /*[B2]*/
    U_LB_BREAK_AFTER = 4,       /*[BA]*/
    U_LB_BREAK_BEFORE = 5,      /*[BB]*/
    U_LB_MANDATORY_BREAK = 6,   /*[BK]*/
    U_LB_CONTINGENT_BREAK = 7,  /*[CB]*/
    U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
    U_LB_COMBINING_MARK = 9,    /*[CM]*/
    U_LB_CARRIAGE_RETURN = 10,   /*[CR]*/
    U_LB_EXCLAMATION = 11,       /*[EX]*/
    U_LB_GLUE = 12,              /*[GL]*/
    U_LB_HYPHEN = 13,            /*[HY]*/
    U_LB_IDEOGRAPHIC = 14,       /*[ID]*/
    /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
    U_LB_INSEPARABLE = 15,       /*[IN]*/
    U_LB_INSEPERABLE = U_LB_INSEPARABLE,
    U_LB_INFIX_NUMERIC = 16,     /*[IS]*/
    U_LB_LINE_FEED = 17,         /*[LF]*/
    U_LB_NONSTARTER = 18,        /*[NS]*/
    U_LB_NUMERIC = 19,           /*[NU]*/
    U_LB_OPEN_PUNCTUATION = 20,  /*[OP]*/
    U_LB_POSTFIX_NUMERIC = 21,   /*[PO]*/
    U_LB_PREFIX_NUMERIC = 22,    /*[PR]*/
    U_LB_QUOTATION = 23,         /*[QU]*/
    U_LB_COMPLEX_CONTEXT = 24,   /*[SA]*/
    U_LB_SURROGATE = 25,         /*[SG]*/
    U_LB_SPACE = 26,             /*[SP]*/
    U_LB_BREAK_SYMBOLS = 27,     /*[SY]*/
    U_LB_ZWSPACE = 28,           /*[ZW]*/
    /** @stable ICU 2.6 */
    U_LB_NEXT_LINE = 29,         /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
    /** @stable ICU 2.6 */
    U_LB_WORD_JOINER = 30,       /*[WJ]*/
    /** @stable ICU 3.4 */
    U_LB_H2 = 31,                /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
    /** @stable ICU 3.4 */
    U_LB_H3 = 32,                /*[H3]*/
    /** @stable ICU 3.4 */
    U_LB_JL = 33,                /*[JL]*/
    /** @stable ICU 3.4 */
    U_LB_JT = 34,                /*[JT]*/
    /** @stable ICU 3.4 */
    U_LB_JV = 35,                /*[JV]*/
    /** @stable ICU 4.4 */
    U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
    /** @stable ICU 49 */
    U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
    /** @stable ICU 49 */
    U_LB_HEBREW_LETTER = 38,     /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
    /** @stable ICU 50 */
    U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
    /** @stable ICU 58 */
    U_LB_E_BASE = 40,            /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
    /** @stable ICU 58 */
    U_LB_E_MODIFIER = 41,        /*[EM]*/
    /** @stable ICU 58 */
    U_LB_ZWJ = 42,               /*[ZWJ]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal ULineBreak value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_LB_COUNT = 43
#endif  // U_HIDE_DEPRECATED_API
} ULineBreak;

/**
 * Numeric Type constants.
 *
 * @see UCHAR_NUMERIC_TYPE
 * @stable ICU 2.2
 */
typedef enum UNumericType {
    /*
     * Note: UNumericType constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_NT_<Unicode Numeric_Type value name>
     */

    U_NT_NONE,              /*[None]*/
    U_NT_DECIMAL,           /*[de]*/
    U_NT_DIGIT,             /*[di]*/
    U_NT_NUMERIC,           /*[nu]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UNumericType value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_NT_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UNumericType;

/**
 * Hangul Syllable Type constants.
 *
 * @see UCHAR_HANGUL_SYLLABLE_TYPE
 * @stable ICU 2.6
 */
typedef enum UHangulSyllableType {
    /*
     * Note: UHangulSyllableType constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_HST_<Unicode Hangul_Syllable_Type value name>
     */

    U_HST_NOT_APPLICABLE,   /*[NA]*/
    U_HST_LEADING_JAMO,     /*[L]*/
    U_HST_VOWEL_JAMO,       /*[V]*/
    U_HST_TRAILING_JAMO,    /*[T]*/
    U_HST_LV_SYLLABLE,      /*[LV]*/
    U_HST_LVT_SYLLABLE,     /*[LVT]*/
#ifndef U_HIDE_DEPRECATED_API
    /**
     * One more than the highest normal UHangulSyllableType value.
     * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).
     *
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    U_HST_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UHangulSyllableType;

/**
 * Indic Positional Category constants.
 *
 * @see UCHAR_INDIC_POSITIONAL_CATEGORY
 * @stable ICU 63
 */
typedef enum UIndicPositionalCategory {
    /*
     * Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_INPC_<Unicode Indic_Positional_Category value name>
     */

    /** @stable ICU 63 */
    U_INPC_NA,
    /** @stable ICU 63 */
    U_INPC_BOTTOM,
    /** @stable ICU 63 */
    U_INPC_BOTTOM_AND_LEFT,
    /** @stable ICU 63 */
    U_INPC_BOTTOM_AND_RIGHT,
    /** @stable ICU 63 */
    U_INPC_LEFT,
    /** @stable ICU 63 */
    U_INPC_LEFT_AND_RIGHT,
    /** @stable ICU 63 */
    U_INPC_OVERSTRUCK,
    /** @stable ICU 63 */
    U_INPC_RIGHT,
    /** @stable ICU 63 */
    U_INPC_TOP,
    /** @stable ICU 63 */
    U_INPC_TOP_AND_BOTTOM,
    /** @stable ICU 63 */
    U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
    /** @stable ICU 63 */
    U_INPC_TOP_AND_LEFT,
    /** @stable ICU 63 */
    U_INPC_TOP_AND_LEFT_AND_RIGHT,
    /** @stable ICU 63 */
    U_INPC_TOP_AND_RIGHT,
    /** @stable ICU 63 */
    U_INPC_VISUAL_ORDER_LEFT,
    /** @stable ICU 66 */
    U_INPC_TOP_AND_BOTTOM_AND_LEFT,
} UIndicPositionalCategory;

/**
 * Indic Syllabic Category constants.
 *
 * @see UCHAR_INDIC_SYLLABIC_CATEGORY
 * @stable ICU 63
 */
typedef enum UIndicSyllabicCategory {
    /*
     * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_INSC_<Unicode Indic_Syllabic_Category value name>
     */

    /** @stable ICU 63 */
    U_INSC_OTHER,
    /** @stable ICU 63 */
    U_INSC_AVAGRAHA,
    /** @stable ICU 63 */
    U_INSC_BINDU,
    /** @stable ICU 63 */
    U_INSC_BRAHMI_JOINING_NUMBER,
    /** @stable ICU 63 */
    U_INSC_CANTILLATION_MARK,
    /** @stable ICU 63 */
    U_INSC_CONSONANT,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_DEAD,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_FINAL,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_HEAD_LETTER,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_INITIAL_POSTFIXED,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_KILLER,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_MEDIAL,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_PLACEHOLDER,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_PRECEDING_REPHA,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_PREFIXED,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_SUBJOINED,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_SUCCEEDING_REPHA,
    /** @stable ICU 63 */
    U_INSC_CONSONANT_WITH_STACKER,
    /** @stable ICU 63 */
    U_INSC_GEMINATION_MARK,
    /** @stable ICU 63 */
    U_INSC_INVISIBLE_STACKER,
    /** @stable ICU 63 */
    U_INSC_JOINER,
    /** @stable ICU 63 */
    U_INSC_MODIFYING_LETTER,
    /** @stable ICU 63 */
    U_INSC_NON_JOINER,
    /** @stable ICU 63 */
    U_INSC_NUKTA,
    /** @stable ICU 63 */
    U_INSC_NUMBER,
    /** @stable ICU 63 */
    U_INSC_NUMBER_JOINER,
    /** @stable ICU 63 */
    U_INSC_PURE_KILLER,
    /** @stable ICU 63 */
    U_INSC_REGISTER_SHIFTER,
    /** @stable ICU 63 */
    U_INSC_SYLLABLE_MODIFIER,
    /** @stable ICU 63 */
    U_INSC_TONE_LETTER,
    /** @stable ICU 63 */
    U_INSC_TONE_MARK,
    /** @stable ICU 63 */
    U_INSC_VIRAMA,
    /** @stable ICU 63 */
    U_INSC_VISARGA,
    /** @stable ICU 63 */
    U_INSC_VOWEL,
    /** @stable ICU 63 */
    U_INSC_VOWEL_DEPENDENT,
    /** @stable ICU 63 */
    U_INSC_VOWEL_INDEPENDENT,
} UIndicSyllabicCategory;

/**
 * Vertical Orientation constants.
 *
 * @see UCHAR_VERTICAL_ORIENTATION
 * @stable ICU 63
 */
typedef enum UVerticalOrientation {
    /*
     * Note: UVerticalOrientation constants are parsed by preparseucd.py.
     * It matches lines like
     *     U_VO_<Unicode Vertical_Orientation value name>
     */

    /** @stable ICU 63 */
    U_VO_ROTATED,
    /** @stable ICU 63 */
    U_VO_TRANSFORMED_ROTATED,
    /** @stable ICU 63 */
    U_VO_TRANSFORMED_UPRIGHT,
    /** @stable ICU 63 */
    U_VO_UPRIGHT,
} UVerticalOrientation;

/**
 * Check a binary Unicode property for a code point.
 *
 * Unicode, especially in version 3.2, defines many more properties than the
 * original set in UnicodeData.txt.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 * For details about the properties see http://www.unicode.org/ucd/ .
 * For names of Unicode properties see the UCD file PropertyAliases.txt.
 *
 * Important: If ICU is built with UCD files from Unicode versions below 3.2,
 * then properties marked with "new in Unicode 3.2" are not or not fully available.
 *
 * @param c Code point to test.
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
 * @return true or false according to the binary Unicode property value for c.
 *         Also false if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all.
 *
 * @see UProperty
 * @see u_getBinaryPropertySet
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which);

/**
 * Returns true if the property is true for the string.
 * Same as u_hasBinaryProperty(single code point, which)
 * if the string contains exactly one code point.
 *
 * Most properties apply only to single code points.
 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
 * defines several properties of strings.
 *
 * @param s String to test.
 * @param length Length of the string, or negative if NUL-terminated.
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
 * @return true or false according to the binary Unicode property value for the string.
 *         Also false if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getBinaryPropertySet
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 70
 */
U_CAPI UBool U_EXPORT2
u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);

/**
 * Returns a frozen USet for a binary property.
 * The library retains ownership over the returned object.
 * Sets an error code if the property number is not one for a binary property.
 *
 * The returned set contains all code points for which the property is true.
 *
 * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the property as a set
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see Unicode::fromUSet
 * @stable ICU 63
 */
U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);

/**
 * Check if a code point has the Alphabetic Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
 * This is different from u_isalpha!
 * @param c Code point to test
 * @return true if the code point has the Alphabetic Unicode property, false otherwise
 *
 * @see UCHAR_ALPHABETIC
 * @see u_isalpha
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUAlphabetic(UChar32 c);

/**
 * Check if a code point has the Lowercase Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
 * This is different from u_islower!
 * @param c Code point to test
 * @return true if the code point has the Lowercase Unicode property, false otherwise
 *
 * @see UCHAR_LOWERCASE
 * @see u_islower
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isULowercase(UChar32 c);

/**
 * Check if a code point has the Uppercase Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
 * This is different from u_isupper!
 * @param c Code point to test
 * @return true if the code point has the Uppercase Unicode property, false otherwise
 *
 * @see UCHAR_UPPERCASE
 * @see u_isupper
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUUppercase(UChar32 c);

/**
 * Check if a code point has the White_Space Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
 * This is different from both u_isspace and u_isWhitespace!
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c Code point to test
 * @return true if the code point has the White_Space Unicode property, false otherwise.
 *
 * @see UCHAR_WHITE_SPACE
 * @see u_isWhitespace
 * @see u_isspace
 * @see u_isJavaSpaceChar
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUWhiteSpace(UChar32 c);

/**
 * Get the property value for an enumerated or integer Unicode property for a code point.
 * Also returns binary and mask property values.
 *
 * Unicode, especially in version 3.2, defines many more properties than the
 * original set in UnicodeData.txt.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 * For details about the properties see http://www.unicode.org/ .
 * For names of Unicode properties see the UCD file PropertyAliases.txt.
 *
 * Sample usage:
 * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
 * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
 *
 * @param c Code point to test.
 * @param which UProperty selector constant, identifies which property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 * @return Numeric value that is directly the property value or,
 *         for enumerated properties, corresponds to the numeric value of the enumerated
 *         constant of the respective property value enumeration type
 *         (cast to enum type if necessary).
 *         Returns 0 or 1 (for false/true) for binary Unicode properties.
 *         Returns a bit-mask for mask properties.
 *         Returns 0 if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all, or not for this code point.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getIntPropertyMinValue
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyMap
 * @see u_getUnicodeVersion
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which);

/**
 * Get the minimum value for an enumerated/integer/binary Unicode property.
 * Can be used together with u_getIntPropertyMaxValue
 * to allocate arrays of UnicodeSet or similar.
 *
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
 * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
 *         0 if the property selector is out of range.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getUnicodeVersion
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMinValue(UProperty which);

/**
 * Get the maximum value for an enumerated/integer/binary Unicode property.
 * Can be used together with u_getIntPropertyMinValue
 * to allocate arrays of UnicodeSet or similar.
 *
 * Examples for min/max values (for Unicode 3.2):
 *
 * - UCHAR_BIDI_CLASS:    0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
 * - UCHAR_SCRIPT:        0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
 * - UCHAR_IDEOGRAPHIC:   0/1  (false/true)
 *
 * For undefined UProperty constant values, min/max values will be 0/-1.
 *
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
 * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
 *         <=0 if the property selector is out of range.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getUnicodeVersion
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which);

/**
 * Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
 * The library retains ownership over the returned object.
 * Sets an error code if the property number is not one for an "int property".
 *
 * The returned object maps all Unicode code points to their values for that property.
 * For documentation of the integer values see u_getIntPropertyValue().
 *
 * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the property as a map
 * @see UProperty
 * @see u_getIntPropertyValue
 * @stable ICU 63
 */
U_CAPI const UCPMap * U_EXPORT2
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);

/**
 * Get the numeric value for a Unicode code point as defined in the
 * Unicode Character Database.
 *
 * A "double" return type is necessary because
 * some numeric values are fractions, negative, or too large for int32_t.
 *
 * For characters without any numeric values in the Unicode Character Database,
 * this function will return U_NO_NUMERIC_VALUE.
 * Note: This is different from the Unicode Standard which specifies NaN as the default value.
 * (NaN is not available on all platforms.)
 *
 * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
 * also supports negative values, large values, and fractions,
 * while Java's getNumericValue() returns values 10..35 for ASCII letters.
 *
 * @param c Code point to get the numeric value for.
 * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
 *
 * @see U_NO_NUMERIC_VALUE
 * @stable ICU 2.2
 */
U_CAPI double U_EXPORT2
u_getNumericValue(UChar32 c);

/**
 * Special value that is returned by u_getNumericValue when
 * no numeric value is defined for a code point.
 *
 * @see u_getNumericValue
 * @stable ICU 2.2
 */
#define U_NO_NUMERIC_VALUE ((double)-123456789.)

/**
 * Determines whether the specified code point has the general category "Ll"
 * (lowercase letter).
 *
 * Same as java.lang.Character.isLowerCase().
 *
 * This misses some characters that are also lowercase but
 * have a different general category value.
 * In order to include those, use UCHAR_LOWERCASE.
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an Ll lowercase letter
 *
 * @see UCHAR_LOWERCASE
 * @see u_isupper
 * @see u_istitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_islower(UChar32 c);

/**
 * Determines whether the specified code point has the general category "Lu"
 * (uppercase letter).
 *
 * Same as java.lang.Character.isUpperCase().
 *
 * This misses some characters that are also uppercase but
 * have a different general category value.
 * In order to include those, use UCHAR_UPPERCASE.
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an Lu uppercase letter
 *
 * @see UCHAR_UPPERCASE
 * @see u_islower
 * @see u_istitle
 * @see u_tolower
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isupper(UChar32 c);

/**
 * Determines whether the specified code point is a titlecase letter.
 * True for general category "Lt" (titlecase letter).
 *
 * Same as java.lang.Character.isTitleCase().
 *
 * @param c the code point to be tested
 * @return true if the code point is an Lt titlecase letter
 *
 * @see u_isupper
 * @see u_islower
 * @see u_totitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_istitle(UChar32 c);

/**
 * Determines whether the specified code point is a digit character according to Java.
 * True for characters with general category "Nd" (decimal digit numbers).
 * Beginning with Unicode 4, this is the same as
 * testing for the Numeric_Type of Decimal.
 *
 * Same as java.lang.Character.isDigit().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a digit character according to Character.isDigit()
 *
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isdigit(UChar32 c);

/**
 * Determines whether the specified code point is a letter character.
 * True for general categories "L" (letters).
 *
 * Same as java.lang.Character.isLetter().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a letter character
 *
 * @see u_isdigit
 * @see u_isalnum
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isalpha(UChar32 c);

/**
 * Determines whether the specified code point is an alphanumeric character
 * (letter or digit) according to Java.
 * True for characters with general categories
 * "L" (letters) and "Nd" (decimal digit numbers).
 *
 * Same as java.lang.Character.isLetterOrDigit().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit()
 *
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isalnum(UChar32 c);

/**
 * Determines whether the specified code point is a hexadecimal digit.
 * This is equivalent to u_digit(c, 16)>=0.
 * True for characters with general category "Nd" (decimal digit numbers)
 * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
 * (That is, for letters with code points
 * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
 *
 * In order to narrow the definition of hexadecimal digits to only ASCII
 * characters, use (c<=0x7f && u_isxdigit(c)).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a hexadecimal digit
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isxdigit(UChar32 c);

/**
 * Determines whether the specified code point is a punctuation character.
 * True for characters with general categories "P" (punctuation).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a punctuation character
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_ispunct(UChar32 c);

/**
 * Determines whether the specified code point is a "graphic" character
 * (printable, excluding spaces).
 * true for all characters except those with general categories
 * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
 * "Cn" (unassigned), and "Z" (separators).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a "graphic" character
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isgraph(UChar32 c);

/**
 * Determines whether the specified code point is a "blank" or "horizontal space",
 * a character that visibly separates words on a line.
 * The following are equivalent definitions:
 *
 * true for Unicode White_Space characters except for "vertical space controls"
 * where "vertical space controls" are the following characters:
 * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
 *
 * same as
 *
 * true for U+0009 (TAB) and characters with general category "Zs" (space separators).
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a "blank"
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isblank(UChar32 c);

/**
 * Determines whether the specified code point is "defined",
 * which usually means that it is assigned a character.
 * True for general categories other than "Cn" (other, not assigned),
 * i.e., true for all code points mentioned in UnicodeData.txt.
 *
 * Note that non-character code points (e.g., U+FDD0) are not "defined"
 * (they are Cn), but surrogate code points are "defined" (Cs).
 *
 * Same as java.lang.Character.isDefined().
 *
 * @param c the code point to be tested
 * @return true if the code point is assigned a character
 *
 * @see u_isdigit
 * @see u_isalpha
 * @see u_isalnum
 * @see u_isupper
 * @see u_islower
 * @see u_istitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isdefined(UChar32 c);

/**
 * Determines if the specified character is a space character or not.
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c    the character to be tested
 * @return  true if the character is a space character; false otherwise.
 *
 * @see u_isJavaSpaceChar
 * @see u_isWhitespace
 * @see u_isUWhiteSpace
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isspace(UChar32 c);

/**
 * Determine if the specified code point is a space character according to Java.
 * True for characters with general categories "Z" (separators),
 * which does not include control codes (e.g., TAB or Line Feed).
 *
 * Same as java.lang.Character.isSpaceChar().
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c the code point to be tested
 * @return true if the code point is a space character according to Character.isSpaceChar()
 *
 * @see u_isspace
 * @see u_isWhitespace
 * @see u_isUWhiteSpace
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isJavaSpaceChar(UChar32 c);

/**
 * Determines if the specified code point is a whitespace character according to Java/ICU.
 * A character is considered to be a Java whitespace character if and only
 * if it satisfies one of the following criteria:
 *
 * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
 *      also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 * - It is U+0009 HORIZONTAL TABULATION.
 * - It is U+000A LINE FEED.
 * - It is U+000B VERTICAL TABULATION.
 * - It is U+000C FORM FEED.
 * - It is U+000D CARRIAGE RETURN.
 * - It is U+001C FILE SEPARATOR.
 * - It is U+001D GROUP SEPARATOR.
 * - It is U+001E RECORD SEPARATOR.
 * - It is U+001F UNIT SEPARATOR.
 *
 * This API tries to sync with the semantics of Java's
 * java.lang.Character.isWhitespace(), but it may not return
 * the exact same results because of the Unicode version
 * difference.
 *
 * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
 * See http://www.unicode.org/versions/Unicode4.0.1/
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c the code point to be tested
 * @return true if the code point is a whitespace character according to Java/ICU
 *
 * @see u_isspace
 * @see u_isJavaSpaceChar
 * @see u_isUWhiteSpace
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isWhitespace(UChar32 c);

/**
 * Determines whether the specified code point is a control character
 * (as defined by this function).
 * A control character is one of the following:
 * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
 * - U_CONTROL_CHAR (Cc)
 * - U_FORMAT_CHAR (Cf)
 * - U_LINE_SEPARATOR (Zl)
 * - U_PARAGRAPH_SEPARATOR (Zp)
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a control character
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_isprint
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_iscntrl(UChar32 c);

/**
 * Determines whether the specified code point is an ISO control code.
 * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
 *
 * Same as java.lang.Character.isISOControl().
 *
 * @param c the code point to be tested
 * @return true if the code point is an ISO control code
 *
 * @see u_iscntrl
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isISOControl(UChar32 c);

/**
 * Determines whether the specified code point is a printable character.
 * True for general categories <em>other</em> than "C" (controls).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a printable character
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_iscntrl
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isprint(UChar32 c);

/**
 * Non-standard: Determines whether the specified code point is a base character.
 * True for general categories "L" (letters), "N" (numbers),
 * "Mc" (spacing combining marks), and "Me" (enclosing marks).
 *
 * Note that this is different from the Unicode Standard definition in
 * chapter 3.6, conformance clause D51 “Base character”,
 * which defines base characters as the code points with general categories
 * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).
 *
 * @param c the code point to be tested
 * @return true if the code point is a base character according to this function
 *
 * @see u_isalpha
 * @see u_isdigit
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isbase(UChar32 c);

/**
 * Returns the bidirectional category value for the code point,
 * which is used in the Unicode bidirectional algorithm
 * (UAX #9 http://www.unicode.org/reports/tr9/).
 * Note that some <em>unassigned</em> code points have bidi values
 * of R or AL because they are in blocks that are reserved
 * for Right-To-Left scripts.
 *
 * Same as java.lang.Character.getDirectionality()
 *
 * @param c the code point to be tested
 * @return the bidirectional category (UCharDirection) value
 *
 * @see UCharDirection
 * @stable ICU 2.0
 */
U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar32 c);

/**
 * Determines whether the code point has the Bidi_Mirrored property.
 * This property is set for characters that are commonly used in
 * Right-To-Left contexts and need to be displayed with a "mirrored"
 * glyph.
 *
 * Same as java.lang.Character.isMirrored().
 * Same as UCHAR_BIDI_MIRRORED
 *
 * @param c the code point to be tested
 * @return true if the character has the Bidi_Mirrored property
 *
 * @see UCHAR_BIDI_MIRRORED
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isMirrored(UChar32 c);

/**
 * Maps the specified character to a "mirror-image" character.
 * For characters with the Bidi_Mirrored property, implementations
 * sometimes need a "poor man's" mapping to another Unicode
 * character (code point) such that the default glyph may serve
 * as the mirror-image of the default glyph of the specified
 * character. This is useful for text conversion to and from
 * codepages with visual order, and for displays without glyph
 * selection capabilities.
 *
 * @param c the code point to be mapped
 * @return another Unicode code point that may serve as a mirror-image
 *         substitute, or c itself if there is no such mapping or c
 *         does not have the Bidi_Mirrored property
 *
 * @see UCHAR_BIDI_MIRRORED
 * @see u_isMirrored
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_charMirror(UChar32 c);

/**
 * Maps the specified character to its paired bracket character.
 * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
 * Otherwise c itself is returned.
 * See http://www.unicode.org/reports/tr9/
 *
 * @param c the code point to be mapped
 * @return the paired bracket code point,
 *         or c itself if there is no such mapping
 *         (Bidi_Paired_Bracket_Type=None)
 *
 * @see UCHAR_BIDI_PAIRED_BRACKET
 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
 * @see u_charMirror
 * @stable ICU 52
 */
U_CAPI UChar32 U_EXPORT2
u_getBidiPairedBracket(UChar32 c);

/**
 * Returns the general category value for the code point.
 *
 * Same as java.lang.Character.getType().
 *
 * @param c the code point to be tested
 * @return the general category (UCharCategory) value
 *
 * @see UCharCategory
 * @stable ICU 2.0
 */
U_CAPI int8_t U_EXPORT2
u_charType(UChar32 c);

/**
 * Get a single-bit bit set for the general category of a character.
 * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
 * Same as U_MASK(u_charType(c)).
 *
 * @param c the code point to be tested
 * @return a single-bit mask corresponding to the general category (UCharCategory) value
 *
 * @see u_charType
 * @see UCharCategory
 * @see U_GC_CN_MASK
 * @stable ICU 2.1
 */
#define U_GET_GC_MASK(c) U_MASK(u_charType(c))

/**
 * Callback from u_enumCharTypes(), is called for each contiguous range
 * of code points c (where start<=c<limit)
 * with the same Unicode general category ("character type").
 *
 * The callback function can stop the enumeration by returning false.
 *
 * @param context an opaque pointer, as passed into utrie_enum()
 * @param start the first code point in a contiguous range with value
 * @param limit one past the last code point in a contiguous range with value
 * @param type the general category for all code points in [start..limit[
 * @return false to stop the enumeration
 *
 * @stable ICU 2.1
 * @see UCharCategory
 * @see u_enumCharTypes
 */
typedef UBool U_CALLCONV
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);

/**
 * Enumerate efficiently all code points with their Unicode general categories.
 *
 * This is useful for building data structures (e.g., UnicodeSet's),
 * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
 *
 * For each contiguous range of code points with a given general category ("character type"),
 * the UCharEnumTypeRange function is called.
 * Adjacent ranges have different types.
 * The Unicode Standard guarantees that the numeric value of the type is 0..31.
 *
 * @param enumRange a pointer to a function that is called for each contiguous range
 *                  of code points with the same general category
 * @param context an opaque pointer that is passed on to the callback function
 *
 * @stable ICU 2.1
 * @see UCharCategory
 * @see UCharEnumTypeRange
 */
U_CAPI void U_EXPORT2
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);

#if !UCONFIG_NO_NORMALIZATION

/**
 * Returns the combining class of the code point as specified in UnicodeData.txt.
 *
 * @param c the code point of the character
 * @return the combining class of the character
 * @stable ICU 2.0
 */
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c);

#endif

/**
 * Returns the decimal digit value of a decimal digit character.
 * Such characters have the general category "Nd" (decimal digit numbers)
 * and a Numeric_Type of Decimal.
 *
 * Unlike ICU releases before 2.6, no digit values are returned for any
 * Han characters because Han number characters are often used with a special
 * Chinese-style number format (with characters for powers of 10 in between)
 * instead of in decimal-positional notation.
 * Unicode 4 explicitly assigns Han number characters the Numeric_Type
 * Numeric instead of Decimal.
 * See Jitterbug 1483 for more details.
 *
 * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
 * for complete numeric Unicode properties.
 *
 * @param c the code point for which to get the decimal digit value
 * @return the decimal digit value of c,
 *         or -1 if c is not a decimal digit character
 *
 * @see u_getNumericValue
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_charDigitValue(UChar32 c);

/**
 * Returns the Unicode allocation block that contains the character.
 *
 * @param c the code point to be tested
 * @return the block value (UBlockCode) for c
 *
 * @see UBlockCode
 * @stable ICU 2.0
 */
U_CAPI UBlockCode U_EXPORT2
ublock_getCode(UChar32 c);

/**
 * Retrieve the name of a Unicode character.
 * Depending on <code>nameChoice</code>, the character name written
 * into the buffer is the "modern" name or the name that was defined
 * in Unicode version 1.0.
 * The name contains only "invariant" characters
 * like A-Z, 0-9, space, and '-'.
 * Unicode 1.0 names are only retrieved if they are different from the modern
 * names and if the data file contains the data for them. gennames may or may
 * not be called with a command line option to include 1.0 names in unames.dat.
 *
 * @param code The character (code point) for which to get the name.
 *             It must be <code>0<=code<=0x10ffff</code>.
 * @param nameChoice Selector for which name to get.
 * @param buffer Destination address for copying the name.
 *               The name will always be zero-terminated.
 *               If there is no name, then the buffer will be set to the empty string.
 * @param bufferLength <code>==sizeof(buffer)</code>
 * @param pErrorCode Pointer to a UErrorCode variable;
 *        check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
 *        returns.
 * @return The length of the name, or 0 if there is no name for this character.
 *         If the bufferLength is less than or equal to the length, then the buffer
 *         contains the truncated name and the returned length indicates the full
 *         length of the name.
 *         The length does not include the zero-termination.
 *
 * @see UCharNameChoice
 * @see u_charFromName
 * @see u_enumCharNames
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_charName(UChar32 code, UCharNameChoice nameChoice,
           char *buffer, int32_t bufferLength,
           UErrorCode *pErrorCode);

#ifndef U_HIDE_DEPRECATED_API
/**
 * Returns an empty string.
 * Used to return the ISO 10646 comment for a character.
 * The Unicode ISO_Comment property is deprecated and has no values.
 *
 * @param c The character (code point) for which to get the ISO comment.
 *             It must be <code>0<=c<=0x10ffff</code>.
 * @param dest Destination address for copying the comment.
 *             The comment will be zero-terminated if possible.
 *             If there is no comment, then the buffer will be set to the empty string.
 * @param destCapacity <code>==sizeof(dest)</code>
 * @param pErrorCode Pointer to a UErrorCode variable;
 *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
 *        returns.
 * @return 0
 *
 * @deprecated ICU 49
 */
U_DEPRECATED int32_t U_EXPORT2
u_getISOComment(UChar32 c,
                char *dest, int32_t destCapacity,
                UErrorCode *pErrorCode);
#endif  /* U_HIDE_DEPRECATED_API */

/**
 * Find a Unicode character by its name and return its code point value.
 * The name is matched exactly and completely.
 * If the name does not correspond to a code point, <i>pErrorCode</i>
 * is set to <code>U_INVALID_CHAR_FOUND</code>.
 * A Unicode 1.0 name is matched only if it differs from the modern name.
 * Unicode names are all uppercase. Extended names are lowercase followed
 * by an uppercase hexadecimal number, and within angle brackets.
 *
 * @param nameChoice Selector for which name to match.
 * @param name The name to match.
 * @param pErrorCode Pointer to a UErrorCode variable
 * @return The Unicode value of the code point with the given name,
 *         or an undefined value if there is no such code point.
 *
 * @see UCharNameChoice
 * @see u_charName
 * @see u_enumCharNames
 * @stable ICU 1.7
 */
U_CAPI UChar32 U_EXPORT2
u_charFromName(UCharNameChoice nameChoice,
               const char *name,
               UErrorCode *pErrorCode);

/**
 * Type of a callback function for u_enumCharNames() that gets called
 * for each Unicode character with the code point value and
 * the character name.
 * If such a function returns false, then the enumeration is stopped.
 *
 * @param context The context pointer that was passed to u_enumCharNames().
 * @param code The Unicode code point for the character with this name.
 * @param nameChoice Selector for which kind of names is enumerated.
 * @param name The character's name, zero-terminated.
 * @param length The length of the name.
 * @return true if the enumeration should continue, false to stop it.
 *
 * @see UCharNameChoice
 * @see u_enumCharNames
 * @stable ICU 1.7
 */
typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
                               UChar32 code,
                               UCharNameChoice nameChoice,
                               const char *name,
                               int32_t length);

/**
 * Enumerate all assigned Unicode characters between the start and limit
 * code points (start inclusive, limit exclusive) and call a function
 * for each, passing the code point value and the character name.
 * For Unicode 1.0 names, only those are enumerated that differ from the
 * modern names.
 *
 * @param start The first code point in the enumeration range.
 * @param limit One more than the last code point in the enumeration range
 *              (the first one after the range).
 * @param fn The function that is to be called for each character name.
 * @param context An arbitrary pointer that is passed to the function.
 * @param nameChoice Selector for which kind of names to enumerate.
 * @param pErrorCode Pointer to a UErrorCode variable
 *
 * @see UCharNameChoice
 * @see UEnumCharNamesFn
 * @see u_charName
 * @see u_charFromName
 * @stable ICU 1.7
 */
U_CAPI void U_EXPORT2
u_enumCharNames(UChar32 start, UChar32 limit,
                UEnumCharNamesFn *fn,
                void *context,
                UCharNameChoice nameChoice,
                UErrorCode *pErrorCode);

/**
 * Return the Unicode name for a given property, as given in the
 * Unicode database file PropertyAliases.txt.
 *
 * In addition, this function maps the property
 * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
 * "General_Category_Mask".  These names are not in
 * PropertyAliases.txt.
 *
 * @param property UProperty selector other than UCHAR_INVALID_CODE.
 *         If out of range, NULL is returned.
 *
 * @param nameChoice selector for which name to get.  If out of range,
 *         NULL is returned.  All properties have a long name.  Most
 *         have a short name, but some do not.  Unicode allows for
 *         additional names; if present these will be returned by
 *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
 *
 * @return a pointer to the name, or NULL if either the
 *         property or the nameChoice is out of range.  If a given
 *         nameChoice returns NULL, then all larger values of
 *         nameChoice will return NULL, with one exception: if NULL is
 *         returned for U_SHORT_PROPERTY_NAME, then
 *         U_LONG_PROPERTY_NAME (and higher) may still return a
 *         non-NULL value.  The returned pointer is valid until
 *         u_cleanup() is called.
 *
 * @see UProperty
 * @see UPropertyNameChoice
 * @stable ICU 2.4
 */
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
                  UPropertyNameChoice nameChoice);

/**
 * Return the UProperty enum for a given property name, as specified
 * in the Unicode database file PropertyAliases.txt.  Short, long, and
 * any other variants are recognized.
 *
 * In addition, this function maps the synthetic names "gcm" /
 * "General_Category_Mask" to the property
 * UCHAR_GENERAL_CATEGORY_MASK.  These names are not in
 * PropertyAliases.txt.
 *
 * @param alias the property name to be matched.  The name is compared
 *         using "loose matching" as described in PropertyAliases.txt.
 *
 * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
 *         does not match any property.
 *
 * @see UProperty
 * @stable ICU 2.4
 */
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias);

/**
 * Return the Unicode name for a given property value, as given in the
 * Unicode database file PropertyValueAliases.txt.
 *
 * Note: Some of the names in PropertyValueAliases.txt can only be
 * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
 * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
 *
 * @param property UProperty selector constant.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 *        If out of range, NULL is returned.
 *
 * @param value selector for a value for the given property.  If out
 *         of range, NULL is returned.  In general, valid values range
 *         from 0 up to some maximum.  There are a few exceptions:
 *         (1.) UCHAR_BLOCK values begin at the non-zero value
 *         UBLOCK_BASIC_LATIN.  (2.)  UCHAR_CANONICAL_COMBINING_CLASS
 *         values are not contiguous and range from 0..240.  (3.)
 *         UCHAR_GENERAL_CATEGORY_MASK values are not values of
 *         UCharCategory, but rather mask values produced by
 *         U_GET_GC_MASK().  This allows grouped categories such as
 *         [:L:] to be represented.  Mask values range
 *         non-contiguously from 1..U_GC_P_MASK.
 *
 * @param nameChoice selector for which name to get.  If out of range,
 *         NULL is returned.  All values have a long name.  Most have
 *         a short name, but some do not.  Unicode allows for
 *         additional names; if present these will be returned by
 *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...

 * @return a pointer to the name, or NULL if either the
 *         property or the nameChoice is out of range.  If a given
 *         nameChoice returns NULL, then all larger values of
 *         nameChoice will return NULL, with one exception: if NULL is
 *         returned for U_SHORT_PROPERTY_NAME, then
 *         U_LONG_PROPERTY_NAME (and higher) may still return a
 *         non-NULL value.  The returned pointer is valid until
 *         u_cleanup() is called.
 *
 * @see UProperty
 * @see UPropertyNameChoice
 * @stable ICU 2.4
 */
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
                       int32_t value,
                       UPropertyNameChoice nameChoice);

/**
 * Return the property value integer for a given value name, as
 * specified in the Unicode database file PropertyValueAliases.txt.
 * Short, long, and any other variants are recognized.
 *
 * Note: Some of the names in PropertyValueAliases.txt will only be
 * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
 * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
 *
 * @param property UProperty selector constant.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 *        If out of range, UCHAR_INVALID_CODE is returned.
 *
 * @param alias the value name to be matched.  The name is compared
 *         using "loose matching" as described in
 *         PropertyValueAliases.txt.
 *
 * @return a value integer or UCHAR_INVALID_CODE if the given name
 *         does not match any value of the given property, or if the
 *         property is invalid.  Note: UCHAR_GENERAL_CATEGORY_MASK values
 *         are not values of UCharCategory, but rather mask values
 *         produced by U_GET_GC_MASK().  This allows grouped
 *         categories such as [:L:] to be represented.
 *
 * @see UProperty
 * @stable ICU 2.4
 */
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
                       const char* alias);

/**
 * Determines if the specified character is permissible as the
 * first character in an identifier according to Unicode
 * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
 * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
 *
 * Same as java.lang.Character.isUnicodeIdentifierStart().
 * Same as UCHAR_ID_START
 *
 * @param c the code point to be tested
 * @return true if the code point may start an identifier
 *
 * @see UCHAR_ID_START
 * @see u_isalpha
 * @see u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDStart(UChar32 c);

/**
 * Determines if the specified character is permissible
 * in an identifier according to Java.
 * True for characters with general categories "L" (letters),
 * "Nl" (letter numbers), "Nd" (decimal digits),
 * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
 * u_isIDIgnorable(c).
 *
 * Same as java.lang.Character.isUnicodeIdentifierPart().
 * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
 * except that Unicode recommends to ignore Cf which is less than
 * u_isIDIgnorable(c).
 *
 * @param c the code point to be tested
 * @return true if the code point may occur in an identifier according to Java
 *
 * @see UCHAR_ID_CONTINUE
 * @see u_isIDStart
 * @see u_isIDIgnorable
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDPart(UChar32 c);

/**
 * Determines if the specified character should be regarded
 * as an ignorable character in an identifier,
 * according to Java.
 * True for characters with general category "Cf" (format controls) as well as
 * non-whitespace ISO controls
 * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
 *
 * Same as java.lang.Character.isIdentifierIgnorable().
 *
 * Note that Unicode just recommends to ignore Cf (format controls).
 *
 * @param c the code point to be tested
 * @return true if the code point is ignorable in identifiers according to Java
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_isIDStart
 * @see u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDIgnorable(UChar32 c);

/**
 * Determines if the specified character is permissible as the
 * first character in a Java identifier.
 * In addition to u_isIDStart(c), true for characters with
 * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
 *
 * Same as java.lang.Character.isJavaIdentifierStart().
 *
 * @param c the code point to be tested
 * @return true if the code point may start a Java identifier
 *
 * @see     u_isJavaIDPart
 * @see     u_isalpha
 * @see     u_isIDStart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isJavaIDStart(UChar32 c);

/**
 * Determines if the specified character is permissible
 * in a Java identifier.
 * In addition to u_isIDPart(c), true for characters with
 * general category "Sc" (currency symbols).
 *
 * Same as java.lang.Character.isJavaIdentifierPart().
 *
 * @param c the code point to be tested
 * @return true if the code point may occur in a Java identifier
 *
 * @see     u_isIDIgnorable
 * @see     u_isJavaIDStart
 * @see     u_isalpha
 * @see     u_isdigit
 * @see     u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isJavaIDPart(UChar32 c);

/**
 * The given character is mapped to its lowercase equivalent according to
 * UnicodeData.txt; if the character has no lowercase equivalent, the character
 * itself is returned.
 *
 * Same as java.lang.Character.toLowerCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Lowercase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_tolower(UChar32 c);

/**
 * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
 * if the character has no uppercase equivalent, the character itself is
 * returned.
 *
 * Same as java.lang.Character.toUpperCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Uppercase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_toupper(UChar32 c);

/**
 * The given character is mapped to its titlecase equivalent
 * according to UnicodeData.txt;
 * if none is defined, the character itself is returned.
 *
 * Same as java.lang.Character.toTitleCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Titlecase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_totitle(UChar32 c);

/**
 * The given character is mapped to its case folding equivalent according to
 * UnicodeData.txt and CaseFolding.txt;
 * if the character has no case folding equivalent, the character
 * itself is returned.
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
 * @return the Simple_Case_Folding of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_foldCase(UChar32 c, uint32_t options);

/**
 * Returns the decimal digit value of the code point in the
 * specified radix.
 *
 * If the radix is not in the range <code>2<=radix<=36</code> or if the
 * value of <code>c</code> is not a valid digit in the specified
 * radix, <code>-1</code> is returned. A character is a valid digit
 * if at least one of the following is true:
 * <ul>
 * <li>The character has a decimal digit value.
 *     Such characters have the general category "Nd" (decimal digit numbers)
 *     and a Numeric_Type of Decimal.
 *     In this case the value is the character's decimal digit value.</li>
 * <li>The character is one of the uppercase Latin letters
 *     <code>'A'</code> through <code>'Z'</code>.
 *     In this case the value is <code>c-'A'+10</code>.</li>
 * <li>The character is one of the lowercase Latin letters
 *     <code>'a'</code> through <code>'z'</code>.
 *     In this case the value is <code>ch-'a'+10</code>.</li>
 * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
 *     as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
 *     are recognized.</li>
 * </ul>
 *
 * Same as java.lang.Character.digit().
 *
 * @param   ch      the code point to be tested.
 * @param   radix   the radix.
 * @return  the numeric value represented by the character in the
 *          specified radix,
 *          or -1 if there is no value or if the value exceeds the radix.
 *
 * @see     UCHAR_NUMERIC_TYPE
 * @see     u_forDigit
 * @see     u_charDigitValue
 * @see     u_isdigit
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_digit(UChar32 ch, int8_t radix);

/**
 * Determines the character representation for a specific digit in
 * the specified radix. If the value of <code>radix</code> is not a
 * valid radix, or the value of <code>digit</code> is not a valid
 * digit in the specified radix, the null character
 * (<code>U+0000</code>) is returned.
 * <p>
 * The <code>radix</code> argument is valid if it is greater than or
 * equal to 2 and less than or equal to 36.
 * The <code>digit</code> argument is valid if
 * <code>0 <= digit < radix</code>.
 * <p>
 * If the digit is less than 10, then
 * <code>'0' + digit</code> is returned. Otherwise, the value
 * <code>'a' + digit - 10</code> is returned.
 *
 * Same as java.lang.Character.forDigit().
 *
 * @param   digit   the number to convert to a character.
 * @param   radix   the radix.
 * @return  the <code>char</code> representation of the specified digit
 *          in the specified radix.
 *
 * @see     u_digit
 * @see     u_charDigitValue
 * @see     u_isdigit
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_forDigit(int32_t digit, int8_t radix);

/**
 * Get the "age" of the code point.
 * The "age" is the Unicode version when the code point was first
 * designated (as a non-character or for Private Use)
 * or assigned a character.
 * This can be useful to avoid emitting code points to receiving
 * processes that do not accept newer characters.
 * The data is from the UCD file DerivedAge.txt.
 *
 * @param c The code point.
 * @param versionArray The Unicode version number array, to be filled in.
 *
 * @stable ICU 2.1
 */
U_CAPI void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray);

/**
 * Gets the Unicode version information.
 * The version array is filled in with the version information
 * for the Unicode standard that is currently used by ICU.
 * For example, Unicode version 3.1.1 is represented as an array with
 * the values { 3, 1, 1, 0 }.
 *
 * @param versionArray an output array that will be filled in with
 *                     the Unicode version number
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
u_getUnicodeVersion(UVersionInfo versionArray);

#if !UCONFIG_NO_NORMALIZATION
/**
 * Get the FC_NFKC_Closure property string for a character.
 * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
 * or for "FNC": http://www.unicode.org/reports/tr15/
 *
 * @param c The character (code point) for which to get the FC_NFKC_Closure string.
 *             It must be <code>0<=c<=0x10ffff</code>.
 * @param dest Destination address for copying the string.
 *             The string will be zero-terminated if possible.
 *             If there is no FC_NFKC_Closure string,
 *             then the buffer will be set to the empty string.
 * @param destCapacity <code>==sizeof(dest)</code>
 * @param pErrorCode Pointer to a UErrorCode variable.
 * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
 *         If the destCapacity is less than or equal to the length, then the buffer
 *         contains the truncated name and the returned length indicates the full
 *         length of the name.
 *         The length does not include the zero-termination.
 *
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);

#endif


U_CDECL_END

#endif /*_UCHAR*/
/*eof*/
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       // © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1999-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
 *  ucnv.h:
 *  External APIs for the ICU's codeset conversion library
 *  Bertrand A. Damiba
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   04/04/99    helena      Fixed internal header inclusion.
 *   05/11/00    helena      Added setFallback and usesFallback APIs.
 *   06/29/2000  helena      Major rewrite of the callback APIs.
 *   12/07/2000  srl         Update of documentation
 */

/**
 * \file
 * \brief C API: Character conversion
 *
 * <h2>Character Conversion C API</h2>
 *
 * <p>This API is used to convert codepage or character encoded data to and
 * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
 * converter, you can get its properties, set options, convert your data and
 * close the converter.</p>
 *
 * <p>Since many software programs recognize different converter names for
 * different types of converters, there are other functions in this API to
 * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
 * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
 * more frequently used alias functions to get this information.</p>
 *
 * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
 * its default behavior is to use a substitution character to replace the
 * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
 * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
 * many other callback actions that can be used instead of a character substitution.</p>
 *
 * <p>More information about this API can be found in our
 * <a href="https://unicode-org.github.io/icu/userguide/conversion/">User Guide</a>.</p>
 */

#ifndef UCNV_H
#define UCNV_H

#include "unicode/ucnv_err.h"
#include "unicode/uenum.h"

#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
#endif   // U_SHOW_CPLUSPLUS_API

#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)

#define USET_DEFINED

/**
 * USet is the C API type corresponding to C++ class UnicodeSet.
 * It is forward-declared here to avoid including unicode/uset.h file if related
 * conversion APIs are not used.
 *
 * @see ucnv_getUnicodeSet
 * @stable ICU 2.4
 */
typedef struct USet USet;

#endif

#if !UCONFIG_NO_CONVERSION

U_CDECL_BEGIN

/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)

/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
#define  UCNV_SI 0x0F
/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
#define  UCNV_SO 0x0E

/**
 * Enum for specifying basic types of converters
 * @see ucnv_getType
 * @stable ICU 2.0
 */
typedef enum {
    /** @stable ICU 2.0 */
    UCNV_UNSUPPORTED_CONVERTER = -1,
    /** @stable ICU 2.0 */
    UCNV_SBCS = 0,
    /** @stable ICU 2.0 */
    UCNV_DBCS = 1,
    /** @stable ICU 2.0 */
    UCNV_MBCS = 2,
    /** @stable ICU 2.0 */
    UCNV_LATIN_1 = 3,
    /** @stable ICU 2.0 */
    UCNV_UTF8 = 4,
    /** @stable ICU 2.0 */
    UCNV_UTF16_BigEndian = 5,
    /** @stable ICU 2.0 */
    UCNV_UTF16_LittleEndian = 6,
    /** @stable ICU 2.0 */
    UCNV_UTF32_BigEndian = 7,
    /** @stable ICU 2.0 */
    UCNV_UTF32_LittleEndian = 8,
    /** @stable ICU 2.0 */
    UCNV_EBCDIC_STATEFUL = 9,
    /** @stable ICU 2.0 */
    UCNV_ISO_2022 = 10,

    /** @stable ICU 2.0 */
    UCNV_LMBCS_1 = 11,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_2,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_3,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_4,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_5,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_6,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_8,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_11,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_16,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_17,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_18,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_19,
    /** @stable ICU 2.0 */
    UCNV_LMBCS_LAST = UCNV_LMBCS_19,
    /** @stable ICU 2.0 */
    UCNV_HZ,
    /** @stable ICU 2.0 */
    UCNV_SCSU,
    /** @stable ICU 2.0 */
    UCNV_ISCII,
    /** @stable ICU 2.0 */
    UCNV_US_ASCII,
    /** @stable ICU 2.0 */
    UCNV_UTF7,
    /** @stable ICU 2.2 */
    UCNV_BOCU1,
    /** @stable ICU 2.2 */
    UCNV_UTF16,
    /** @stable ICU 2.2 */
    UCNV_UTF32,
    /** @stable ICU 2.2 */
    UCNV_CESU8,
    /** @stable ICU 2.4 */
    UCNV_IMAP_MAILBOX,
    /** @stable ICU 4.8 */
    UCNV_COMPOUND_TEXT,

    /* Number of converter types for which we have conversion routines. */
    UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
} UConverterType;

/**
 * Enum for specifying which platform a converter ID refers to.
 * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
 *
 * @see ucnv_getPlatform
 * @see ucnv_openCCSID
 * @see ucnv_getCCSID
 * @stable ICU 2.0
 */
typedef enum {
    UCNV_UNKNOWN = -1,
    UCNV_IBM = 0
} UConverterPlatform;

/**
 * Function pointer for error callback in the codepage to unicode direction.
 * Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason).
 * @param context Pointer to the callback's private data
 * @param args Information about the conversion in progress
 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param reason Defines the reason the callback was invoked
 * @param pErrorCode    ICU error code in/out parameter.
 *                      For converter callback functions, set to a conversion error
 *                      before the call, and the callback may reset it to U_ZERO_ERROR.
 * @see ucnv_setToUCallBack
 * @see UConverterToUnicodeArgs
 * @stable ICU 2.0
 */
typedef void (U_EXPORT2 *UConverterToUCallback) (
                  const void* context,
                  UConverterToUnicodeArgs *args,
                  const char *codeUnits,
                  int32_t length,
                  UConverterCallbackReason reason,
                  UErrorCode *pErrorCode);

/**
 * Function pointer for error callback in the unicode to codepage direction.
 * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
 * @param context Pointer to the callback's private data
 * @param args Information about the conversion in progress
 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 * @param reason Defines the reason the callback was invoked
 * @param pErrorCode    ICU error code in/out parameter.
 *                      For converter callback functions, set to a conversion error
 *                      before the call, and the callback may reset it to U_ZERO_ERROR.
 * @see ucnv_setFromUCallBack
 * @stable ICU 2.0
 */
typedef void (U_EXPORT2 *UConverterFromUCallback) (
                    const void* context,
                    UConverterFromUnicodeArgs *args,
                    const UChar* codeUnits,
                    int32_t length,
                    UChar32 codePoint,
                    UConverterCallbackReason reason,
                    UErrorCode *pErrorCode);

U_CDECL_END

/**
 * Character that separates converter names from options and options from each other.
 * @see ucnv_open
 * @stable ICU 2.0
 */
#define UCNV_OPTION_SEP_CHAR ','

/**
 * String version of UCNV_OPTION_SEP_CHAR.
 * @see ucnv_open
 * @stable ICU 2.0
 */
#define UCNV_OPTION_SEP_STRING ","

/**
 * Character that separates a converter option from its value.
 * @see ucnv_open
 * @stable ICU 2.0
 */
#define UCNV_VALUE_SEP_CHAR '='

/**
 * String version of UCNV_VALUE_SEP_CHAR.
 * @see ucnv_open
 * @stable ICU 2.0
 */
#define UCNV_VALUE_SEP_STRING "="

/**
 * Converter option for specifying a locale.
 * For example, ucnv_open("SCSU,locale=ja", &errorCode);
 * See convrtrs.txt.
 *
 * @see ucnv_open
 * @stable ICU 2.0
 */
#define UCNV_LOCALE_OPTION_STRING ",locale="

/**
 * Converter option for specifying a version selector (0..9) for some converters.
 * For example,
 * \code
 *   ucnv_open("UTF-7,version=1", &errorCode);
 * \endcode
 * See convrtrs.txt.
 *
 * @see ucnv_open
 * @stable ICU 2.4
 */
#define UCNV_VERSION_OPTION_STRING ",version="

/**
 * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
 * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
 * S/390 (z/OS) Unix System Services (Open Edition).
 * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
 * See convrtrs.txt.
 *
 * @see ucnv_open
 * @stable ICU 2.4
 */
#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"

/**
 * Do a fuzzy compare of two converter/alias names.
 * The comparison is case-insensitive, ignores leading zeroes if they are not
 * followed by further digits, and ignores all but letters and digits.
 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
 * at http://www.unicode.org/reports/tr22/
 *
 * @param name1 a converter name or alias, zero-terminated
 * @param name2 a converter name or alias, zero-terminated
 * @return 0 if the names match, or a negative value if the name1
 * lexically precedes name2, or a positive value if the name1
 * lexically follows name2.
 * @stable ICU 2.0
 */
U_CAPI int U_EXPORT2
ucnv_compareNames(const char *name1, const char *name2);


/**
 * Creates a UConverter object with the name of a coded character set specified as a C string.
 * The actual name will be resolved with the alias file
 * using a case-insensitive string comparison that ignores
 * leading zeroes and all non-alphanumeric characters.
 * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
 * (See also ucnv_compareNames().)
 * If <code>NULL</code> is passed for the converter name, it will create one with the
 * getDefaultName return value.
 *
 * <p>A converter name for ICU 1.5 and above may contain options
 * like a locale specification to control the specific behavior of
 * the newly instantiated converter.
 * The meaning of the options depends on the particular converter.
 * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
 *
 * <p>Options are appended to the converter name string, with a
 * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
 * also between adjacent options.</p>
 *
 * <p>If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
 *
 * <p>The conversion behavior and names can vary between platforms. ICU may
 * convert some characters differently from other platforms. Details on this topic
 * are in the <a href="https://unicode-org.github.io/icu/userguide/conversion/">User
 * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
 * other than its an alias starting with the letters "cp". Please do not
 * associate any meaning to these aliases.</p>
 *
 * \snippet samples/ucnv/convsamp.cpp ucnv_open
 *
 * @param converterName Name of the coded character set table.
 *          This may have options appended to the string.
 *          IANA alias character set names, IBM CCSIDs starting with "ibm-",
 *          Windows codepage numbers starting with "windows-" are frequently
 *          used for this parameter. See ucnv_getAvailableName and
 *          ucnv_getAlias for a complete list that is available.
 *          If this parameter is NULL, the default converter will be used.
 * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
 * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
 * @see ucnv_openU
 * @see ucnv_openCCSID
 * @see ucnv_getAvailableName
 * @see ucnv_getAlias
 * @see ucnv_getDefaultName
 * @see ucnv_close
 * @see ucnv_compareNames
 * @stable ICU 2.0
 */
U_CAPI UConverter* U_EXPORT2
ucnv_open(const char *converterName, UErrorCode *err);


/**
 * Creates a Unicode converter with the names specified as unicode string.
 * The name should be limited to the ASCII-7 alphanumerics range.
 * The actual name will be resolved with the alias file
 * using a case-insensitive string comparison that ignores
 * leading zeroes and all non-alphanumeric characters.
 * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
 * (See also ucnv_compareNames().)
 * If <TT>NULL</TT> is passed for the converter name, it will create
 * one with the ucnv_getDefaultName() return value.
 * If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
 *
 * <p>See ucnv_open for the complete details</p>
 * @param name Name of the UConverter table in a zero terminated
 *        Unicode string
 * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
 *        U_FILE_ACCESS_ERROR</TT>
 * @return the created Unicode converter object, or <TT>NULL</TT> if an
 *        error occurred
 * @see ucnv_open
 * @see ucnv_openCCSID
 * @see ucnv_close
 * @see ucnv_compareNames
 * @stable ICU 2.0
 */
U_CAPI UConverter* U_EXPORT2
ucnv_openU(const UChar *name,
           UErrorCode *err);

/**
 * Creates a UConverter object from a CCSID number and platform pair.
 * Note that the usefulness of this function is limited to platforms with numeric
 * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
 * encodings.
 *
 * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
 * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
 * for some Unicode conversion tables there are multiple CCSIDs.
 * Some "alternate" Unicode conversion tables are provided by the
 * IBM CDRA conversion table registry.
 * The most prominent example of a systematic modification of conversion tables that is
 * not provided in the form of conversion table files in the repository is
 * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
 * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
 *
 * Only IBM default conversion tables are accessible with ucnv_openCCSID().
 * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
 * with that CCSID.
 *
 * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
 *
 * In summary, the use of CCSIDs and the associated API functions is not recommended.
 *
 * In order to open a converter with the default IBM CDRA Unicode conversion table,
 * you can use this function or use the prefix "ibm-":
 * \code
 *     char name[20];
 *     sprintf(name, "ibm-%hu", ccsid);
 *     cnv=ucnv_open(name, &errorCode);
 * \endcode
 *
 * In order to open a converter with the IBM S/390 Unix System Services variant
 * of a Unicode/EBCDIC conversion table,
 * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
 * \code
 *     char name[20];
 *     sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
 *     cnv=ucnv_open(name, &errorCode);
 * \endcode
 *
 * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
 * \code
 *     char name[20];
 *     sprintf(name, "cp%hu", codepageID);
 *     cnv=ucnv_open(name, &errorCode);
 * \endcode
 *
 * If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
 *
 * @param codepage codepage number to create
 * @param platform the platform in which the codepage number exists
 * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
 * @return the created Unicode converter object, or <TT>NULL</TT> if an error
 *   occurred.
 * @see ucnv_open
 * @see ucnv_openU
 * @see ucnv_close
 * @see ucnv_getCCSID
 * @see ucnv_getPlatform
 * @see UConverterPlatform
 * @stable ICU 2.0
 */
U_CAPI UConverter* U_EXPORT2
ucnv_openCCSID(int32_t codepage,
               UConverterPlatform platform,
               UErrorCode * err);

/**
 * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
 *
 * <p>The packageName and converterName must point to an ICU udata object, as defined by
 *   <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
 * Typically, packageName will refer to a (.dat) file, or to a package registered with
 * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
 *
 * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
 * stored in the converter cache or the alias table. The only way to open further converters
 * is call this function multiple times, or use the ucnv_clone() function to clone a
 * 'primary' converter.</p>
 *
 * <p>A future version of ICU may add alias table lookups and/or caching
 * to this function.</p>
 *
 * <p>Example Use:
 *      <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
 * </p>
 *
 * @param packageName name of the package (equivalent to 'path' in udata_open() call)
 * @param converterName name of the data item to be used, without suffix.
 * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
 * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
 * @see udata_open
 * @see ucnv_open
 * @see ucnv_clone
 * @see ucnv_close
 * @stable ICU 2.2
 */
U_CAPI UConverter* U_EXPORT2
ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);

/**
 * Thread safe converter cloning operation.
 *
 * You must ucnv_close() the clone.
 *
 * @param cnv converter to be cloned
 * @param status to indicate whether the operation went on smoothly or there were errors
 * @return pointer to the new clone
 * @stable ICU 71
 */
U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status);

#ifndef U_HIDE_DEPRECATED_API

/**
 * Thread safe converter cloning operation.
 * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
 * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
 * If the buffer size is sufficient, then the clone will use the stack buffer;
 * otherwise, it will be allocated, and *pBufferSize will indicate
 * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
 *
 * You must ucnv_close() the clone in any case.
 *
 * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
 * then *pBufferSize will be changed to a sufficient size
 * for cloning this converter,
 * without actually cloning the converter ("pure pre-flighting").
 *
 * If *pBufferSize is greater than zero but not large enough for a stack-based
 * clone, then the converter is cloned using newly allocated memory
 * and *pBufferSize is changed to the necessary size.
 *
 * If the converter clone fits into the stack buffer but the stack buffer is not
 * sufficiently aligned for the clone, then the clone will use an
 * adjusted pointer and use an accordingly smaller buffer size.
 *
 * @param cnv converter to be cloned
 * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
 *  user allocated space for the new clone. If NULL new memory will be allocated.
 *  If buffer is not large enough, new memory will be allocated.
 *  Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
 * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
 *  pointer to size of allocated space.
 * @param status to indicate whether the operation went on smoothly or there were errors
 *  An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
 *  is used if pBufferSize != NULL and any allocations were necessary
 *  However, it is better to check if *pBufferSize grew for checking for
 *  allocations because warning codes can be overridden by subsequent
 *  function calls.
 * @return pointer to the new clone
 * @deprecated ICU 71 Use ucnv_clone() instead.
 */
U_DEPRECATED UConverter * U_EXPORT2
ucnv_safeClone(const UConverter *cnv,
               void             *stackBuffer,
               int32_t          *pBufferSize,
               UErrorCode       *status);

/**
 * \def U_CNV_SAFECLONE_BUFFERSIZE
 * Definition of a buffer size that is designed to be large enough for
 * converters to be cloned with ucnv_safeClone().
 * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
 */
#define U_CNV_SAFECLONE_BUFFERSIZE  1024

#endif /* U_HIDE_DEPRECATED_API */

/**
 * Deletes the unicode converter and releases resources associated
 * with just this instance.
 * Does not free up shared converter tables.
 *
 * @param converter the converter object to be deleted
 * @see ucnv_open
 * @see ucnv_openU
 * @see ucnv_openCCSID
 * @stable ICU 2.0
 */
U_CAPI void  U_EXPORT2
ucnv_close(UConverter * converter);

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUConverterPointer
 * "Smart pointer" class, closes a UConverter via ucnv_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 4.4
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);

U_NAMESPACE_END

#endif

/**
 * Fills in the output parameter, subChars, with the substitution characters
 * as multiple bytes.
 * If ucnv_setSubstString() set a Unicode string because the converter is
 * stateful, then subChars will be an empty string.
 *
 * @param converter the Unicode converter
 * @param subChars the substitution characters
 * @param len on input the capacity of subChars, on output the number
 * of bytes copied to it
 * @param  err the outgoing error status code.
 * If the substitution character array is too small, an
 * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
 * @see ucnv_setSubstString
 * @see ucnv_setSubstChars
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getSubstChars(const UConverter *converter,
                   char *subChars,
                   int8_t *len,
                   UErrorCode *err);

/**
 * Sets the substitution chars when converting from unicode to a codepage. The
 * substitution is specified as a string of 1-4 bytes, and may contain
 * <TT>NULL</TT> bytes.
 * The subChars must represent a single character. The caller needs to know the
 * byte sequence of a valid character in the converter's charset.
 * For some converters, for example some ISO 2022 variants, only single-byte
 * substitution characters may be supported.
 * The newer ucnv_setSubstString() function relaxes these limitations.
 *
 * @param converter the Unicode converter
 * @param subChars the substitution character byte sequence we want set
 * @param len the number of bytes in subChars
 * @param err the error status code.  <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
 * len is bigger than the maximum number of bytes allowed in subchars
 * @see ucnv_setSubstString
 * @see ucnv_getSubstChars
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_setSubstChars(UConverter *converter,
                   const char *subChars,
                   int8_t len,
                   UErrorCode *err);

/**
 * Set a substitution string for converting from Unicode to a charset.
 * The caller need not know the charset byte sequence for each charset.
 *
 * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
 * for a single character, this function takes a Unicode string with
 * zero, one or more characters, and immediately verifies that the string can be
 * converted to the charset.
 * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
 * then the function returns with an error accordingly.
 *
 * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
 * by converting on the fly at the point of substitution rather than setting
 * a fixed byte sequence.
 *
 * @param cnv The UConverter object.
 * @param s The Unicode string.
 * @param length The number of UChars in s, or -1 for a NUL-terminated string.
 * @param err Pointer to a standard ICU error code. Its input value must
 *            pass the U_SUCCESS() test, or else the function returns
 *            immediately. Check for U_FAILURE() on output or use with
 *            function chaining. (See User Guide for details.)
 *
 * @see ucnv_setSubstChars
 * @see ucnv_getSubstChars
 * @stable ICU 3.6
 */
U_CAPI void U_EXPORT2
ucnv_setSubstString(UConverter *cnv,
                    const UChar *s,
                    int32_t length,
                    UErrorCode *err);

/**
 * Fills in the output parameter, errBytes, with the error characters from the
 * last failing conversion.
 *
 * @param converter the Unicode converter
 * @param errBytes the codepage bytes which were in error
 * @param len on input the capacity of errBytes, on output the number of
 *  bytes which were copied to it
 * @param err the error status code.
 * If the substitution character array is too small, an
 * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getInvalidChars(const UConverter *converter,
                     char *errBytes,
                     int8_t *len,
                     UErrorCode *err);

/**
 * Fills in the output parameter, errChars, with the error characters from the
 * last failing conversion.
 *
 * @param converter the Unicode converter
 * @param errUChars the UChars which were in error
 * @param len on input the capacity of errUChars, on output the number of
 *  UChars which were copied to it
 * @param err the error status code.
 * If the substitution character array is too small, an
 * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getInvalidUChars(const UConverter *converter,
                      UChar *errUChars,
                      int8_t *len,
                      UErrorCode *err);

/**
 * Resets the state of a converter to the default state. This is used
 * in the case of an error, to restart a conversion from a known default state.
 * It will also empty the internal output buffers.
 * @param converter the Unicode converter
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_reset(UConverter *converter);

/**
 * Resets the to-Unicode part of a converter state to the default state.
 * This is used in the case of an error to restart a conversion to
 * Unicode to a known default state. It will also empty the internal
 * output buffers used for the conversion to Unicode codepoints.
 * @param converter the Unicode converter
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_resetToUnicode(UConverter *converter);

/**
 * Resets the from-Unicode part of a converter state to the default state.
 * This is used in the case of an error to restart a conversion from
 * Unicode to a known default state. It will also empty the internal output
 * buffers used for the conversion from Unicode codepoints.
 * @param converter the Unicode converter
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_resetFromUnicode(UConverter *converter);

/**
 * Returns the maximum number of bytes that are output per UChar in conversion
 * from Unicode using this converter.
 * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
 * to calculate the size of a target buffer for conversion from Unicode.
 *
 * Note: Before ICU 2.8, this function did not return reliable numbers for
 * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
 *
 * This number may not be the same as the maximum number of bytes per
 * "conversion unit". In other words, it may not be the intuitively expected
 * number of bytes per character that would be published for a charset,
 * and may not fulfill any other purpose than the allocation of an output
 * buffer of guaranteed sufficient size for a given input length and converter.
 *
 * Examples for special cases that are taken into account:
 * - Supplementary code points may convert to more bytes than BMP code points.
 *   This function returns bytes per UChar (UTF-16 code unit), not per
 *   Unicode code point, for efficient buffer allocation.
 * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
 * - When m input UChars are converted to n output bytes, then the maximum m/n
 *   is taken into account.
 *
 * The number returned here does not take into account
 * (see UCNV_GET_MAX_BYTES_FOR_STRING):
 * - callbacks which output more than one charset character sequence per call,
 *   like escape callbacks
 * - initial and final non-character bytes that are output by some converters
 *   (automatic BOMs, initial escape sequence, final SI, etc.)
 *
 * Examples for returned values:
 * - SBCS charsets: 1
 * - Shift-JIS: 2
 * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
 * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
 * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
 * - ISO-2022: 3 (always outputs UTF-8)
 * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
 * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
 *
 * @param converter The Unicode converter.
 * @return The maximum number of bytes per UChar (16 bit code unit)
 *    that are output by ucnv_fromUnicode(),
 *    to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
 *    for buffer allocation.
 *
 * @see UCNV_GET_MAX_BYTES_FOR_STRING
 * @see ucnv_getMinCharSize
 * @stable ICU 2.0
 */
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize(const UConverter *converter);

/**
 * Calculates the size of a buffer for conversion from Unicode to a charset.
 * The calculated size is guaranteed to be sufficient for this conversion.
 *
 * It takes into account initial and final non-character bytes that are output
 * by some converters.
 * It does not take into account callbacks which output more than one charset
 * character sequence per call, like escape callbacks.
 * The default (substitution) callback only outputs one charset character sequence.
 *
 * @param length Number of UChars to be converted.
 * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
 *                    that will be used.
 * @return Size of a buffer that will be large enough to hold the output bytes of
 *         converting length UChars with the converter that returned the maxCharSize.
 *
 * @see ucnv_getMaxCharSize
 * @stable ICU 2.8
 */
#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
     (((int32_t)(length)+10)*(int32_t)(maxCharSize))

/**
 * Returns the minimum byte length (per codepoint) for characters in this codepage.
 * This is usually either 1 or 2.
 * @param converter the Unicode converter
 * @return the minimum number of bytes per codepoint allowed by this particular converter
 * @see ucnv_getMaxCharSize
 * @stable ICU 2.0
 */
U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize(const UConverter *converter);

/**
 * Returns the display name of the converter passed in based on the Locale
 * passed in. If the locale contains no display name, the internal ASCII
 * name will be filled in.
 *
 * @param converter the Unicode converter.
 * @param displayLocale is the specific Locale we want to localized for
 * @param displayName user provided buffer to be filled in
 * @param displayNameCapacity size of displayName Buffer
 * @param err error status code
 * @return displayNameLength number of UChar needed in displayName
 * @see ucnv_getName
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_getDisplayName(const UConverter *converter,
                    const char *displayLocale,
                    UChar *displayName,
                    int32_t displayNameCapacity,
                    UErrorCode *err);

/**
 * Gets the internal, canonical name of the converter (zero-terminated).
 * The lifetime of the returned string will be that of the converter
 * passed to this function.
 * @param converter the Unicode converter
 * @param err UErrorCode status
 * @return the internal name of the converter
 * @see ucnv_getDisplayName
 * @stable ICU 2.0
 */
U_CAPI const char * U_EXPORT2
ucnv_getName(const UConverter *converter, UErrorCode *err);

/**
 * Gets a codepage number associated with the converter. This is not guaranteed
 * to be the one used to create the converter. Some converters do not represent
 * platform registered codepages and return zero for the codepage number.
 * The error code fill-in parameter indicates if the codepage number
 * is available.
 * Does not check if the converter is <TT>NULL</TT> or if converter's data
 * table is <TT>NULL</TT>.
 *
 * Important: The use of CCSIDs is not recommended because it is limited
 * to only two platforms in principle and only one (UCNV_IBM) in the current
 * ICU converter API.
 * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
 * For more details see ucnv_openCCSID().
 *
 * @param converter the Unicode converter
 * @param err the error status code.
 * @return If any error occurs, -1 will be returned otherwise, the codepage number
 * will be returned
 * @see ucnv_openCCSID
 * @see ucnv_getPlatform
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_getCCSID(const UConverter *converter,
              UErrorCode *err);

/**
 * Gets a codepage platform associated with the converter. Currently,
 * only <TT>UCNV_IBM</TT> will be returned.
 * Does not test if the converter is <TT>NULL</TT> or if converter's data
 * table is <TT>NULL</TT>.
 * @param converter the Unicode converter
 * @param err the error status code.
 * @return The codepage platform
 * @stable ICU 2.0
 */
U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform(const UConverter *converter,
                 UErrorCode *err);

/**
 * Gets the type of the converter
 * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
 * EBCDIC_STATEFUL, LATIN_1
 * @param converter a valid, opened converter
 * @return the type of the converter
 * @stable ICU 2.0
 */
U_CAPI UConverterType U_EXPORT2
ucnv_getType(const UConverter * converter);

/**
 * Gets the "starter" (lead) bytes for converters of type MBCS.
 * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
 * is not MBCS. Fills in an array of type UBool, with the value of the byte
 * as offset to the array. For example, if (starters[0x20] == true) at return,
 * it means that the byte 0x20 is a starter byte in this converter.
 * Context pointers are always owned by the caller.
 *
 * @param converter a valid, opened converter of type MBCS
 * @param starters an array of size 256 to be filled in
 * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
 * converter is not a type which can return starters.
 * @see ucnv_getType
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getStarters(const UConverter* converter,
                 UBool starters[256],
                 UErrorCode* err);


/**
 * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
 * @see ucnv_getUnicodeSet
 * @stable ICU 2.6
 */
typedef enum UConverterUnicodeSet {
    /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
    UCNV_ROUNDTRIP_SET,
    /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * Number of UConverterUnicodeSet selectors.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UCNV_SET_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UConverterUnicodeSet;


/**
 * Returns the set of Unicode code points that can be converted by an ICU converter.
 *
 * Returns one of several kinds of set:
 *
 * 1. UCNV_ROUNDTRIP_SET
 *
 * The set of all Unicode code points that can be roundtrip-converted
 * (converted without any data loss) with the converter (ucnv_fromUnicode()).
 * This set will not include code points that have fallback mappings
 * or are only the result of reverse fallback mappings.
 * This set will also not include PUA code points with fallbacks, although
 * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
 * See UTR #22 "Character Mapping Markup Language"
 * at http://www.unicode.org/reports/tr22/
 *
 * This is useful for example for
 * - checking that a string or document can be roundtrip-converted with a converter,
 *   without/before actually performing the conversion
 * - testing if a converter can be used for text for typical text for a certain locale,
 *   by comparing its roundtrip set with the set of ExemplarCharacters from
 *   ICU's locale data or other sources
 *
 * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
 *
 * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
 * when fallbacks are turned on (see ucnv_setFallback()).
 * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
 *
 * In the future, there may be more UConverterUnicodeSet choices to select
 * sets with different properties.
 *
 * @param cnv The converter for which a set is requested.
 * @param setFillIn A valid USet *. It will be cleared by this function before
 *            the converter's specific set is filled into the USet.
 * @param whichSet A UConverterUnicodeSet selector;
 *              currently UCNV_ROUNDTRIP_SET is the only supported value.
 * @param pErrorCode ICU error code in/out parameter.
 *                   Must fulfill U_SUCCESS before the function call.
 *
 * @see UConverterUnicodeSet
 * @see uset_open
 * @see uset_close
 * @stable ICU 2.6
 */
U_CAPI void U_EXPORT2
ucnv_getUnicodeSet(const UConverter *cnv,
                   USet *setFillIn,
                   UConverterUnicodeSet whichSet,
                   UErrorCode *pErrorCode);

/**
 * Gets the current callback function used by the converter when an illegal
 *  or invalid codepage sequence is found.
 * Context pointers are always owned by the caller.
 *
 * @param converter the unicode converter
 * @param action fillin: returns the callback function pointer
 * @param context fillin: returns the callback's private void* context
 * @see ucnv_setToUCallBack
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter,
                     UConverterToUCallback *action,
                     const void **context);

/**
 * Gets the current callback function used by the converter when illegal
 * or invalid Unicode sequence is found.
 * Context pointers are always owned by the caller.
 *
 * @param converter the unicode converter
 * @param action fillin: returns the callback function pointer
 * @param context fillin: returns the callback's private void* context
 * @see ucnv_setFromUCallBack
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getFromUCallBack (const UConverter * converter,
                       UConverterFromUCallback *action,
                       const void **context);

/**
 * Changes the callback function used by the converter when
 * an illegal or invalid sequence is found.
 * Context pointers are always owned by the caller.
 * Predefined actions and contexts can be found in the ucnv_err.h header.
 *
 * @param converter the unicode converter
 * @param newAction the new callback function
 * @param newContext the new toUnicode callback context pointer. This can be NULL.
 * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
 * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
 * @param err The error code status
 * @see ucnv_getToUCallBack
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_setToUCallBack (UConverter * converter,
                     UConverterToUCallback newAction,
                     const void* newContext,
                     UConverterToUCallback *oldAction,
                     const void** oldContext,
                     UErrorCode * err);

/**
 * Changes the current callback function used by the converter when
 * an illegal or invalid sequence is found.
 * Context pointers are always owned by the caller.
 * Predefined actions and contexts can be found in the ucnv_err.h header.
 *
 * @param converter the unicode converter
 * @param newAction the new callback function
 * @param newContext the new fromUnicode callback context pointer. This can be NULL.
 * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
 * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
 * @param err The error code status
 * @see ucnv_getFromUCallBack
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_setFromUCallBack (UConverter * converter,
                       UConverterFromUCallback newAction,
                       const void *newContext,
                       UConverterFromUCallback *oldAction,
                       const void **oldContext,
                       UErrorCode * err);

/**
 * Converts an array of unicode characters to an array of codepage
 * characters. This function is optimized for converting a continuous
 * stream of data in buffer-sized chunks, where the entire source and
 * target does not fit in available buffers.
 *
 * The source pointer is an in/out parameter. It starts out pointing where the
 * conversion is to begin, and ends up pointing after the last UChar consumed.
 *
 * Target similarly starts out pointer at the first available byte in the output
 * buffer, and ends up pointing after the last byte written to the output.
 *
 * The converter always attempts to consume the entire source buffer, unless
 * (1.) the target buffer is full, or (2.) a failing error is returned from the
 * current callback function.  When a successful error status has been
 * returned, it means that all of the source buffer has been
 *  consumed. At that point, the caller should reset the source and
 *  sourceLimit pointers to point to the next chunk.
 *
 * At the end of the stream (flush==true), the input is completely consumed
 * when *source==sourceLimit and no error code is set.
 * The converter object is then automatically reset by this function.
 * (This means that a converter need not be reset explicitly between data
 * streams if it finishes the previous stream without errors.)
 *
 * This is a <I>stateful</I> conversion. Additionally, even when all source data has
 * been consumed, some data may be in the converters' internal state.
 * Call this function repeatedly, updating the target pointers with
 * the next empty chunk of target in case of a
 * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
 *  with the next chunk of source when a successful error status is
 * returned, until there are no more chunks of source data.
 * @param converter the Unicode converter
 * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
 *  codepage characters to. Output : points to after the last codepage character copied
 *  to <TT>target</TT>.
 * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
 * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
 * @param sourceLimit the pointer just after the last of the source buffer
 * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
 * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
 * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
 * For output data carried across calls, and other data without a specific source character
 * (such as from escape sequences or callbacks)  -1 will be placed for offsets.
 * @param flush set to <TT>true</TT> if the current source buffer is the last available
 * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
 * this function may have to be called multiple times with flush set to <TT>true</TT> until
 * the source buffer is consumed.
 * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
 * converter is <TT>NULL</TT>.
 * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
 * still data to be written to the target.
 * @see ucnv_fromUChars
 * @see ucnv_convert
 * @see ucnv_getMinCharSize
 * @see ucnv_setToUCallBack
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_fromUnicode (UConverter * converter,
                  char **target,
                  const char *targetLimit,
                  const UChar ** source,
                  const UChar * sourceLimit,
                  int32_t* offsets,
                  UBool flush,
                  UErrorCode * err);

/**
 * Converts a buffer of codepage bytes into an array of unicode UChars
 * characters. This function is optimized for converting a continuous
 * stream of data in buffer-sized chunks, where the entire source and
 * target does not fit in available buffers.
 *
 * The source pointer is an in/out parameter. It starts out pointing where the
 * conversion is to begin, and ends up pointing after the last byte of source consumed.
 *
 * Target similarly starts out pointer at the first available UChar in the output
 * buffer, and ends up pointing after the last UChar written to the output.
 * It does NOT necessarily keep UChar sequences together.
 *
 * The converter always attempts to consume the entire source buffer, unless
 * (1.) the target buffer is full, or (2.) a failing error is returned from the
 * current callback function.  When a successful error status has been
 * returned, it means that all of the source buffer has been
 *  consumed. At that point, the caller should reset the source and
 *  sourceLimit pointers to point to the next chunk.
 *
 * At the end of the stream (flush==true), the input is completely consumed
 * when *source==sourceLimit and no error code is set
 * The converter object is then automatically reset by this function.
 * (This means that a converter need not be reset explicitly between data
 * streams if it finishes the previous stream without errors.)
 *
 * This is a <I>stateful</I> conversion. Additionally, even when all source data has
 * been consumed, some data may be in the converters' internal state.
 * Call this function repeatedly, updating the target pointers with
 * the next empty chunk of target in case of a
 * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
 *  with the next chunk of source when a successful error status is
 * returned, until there are no more chunks of source data.
 * @param converter the Unicode converter
 * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
 *  UChars into. Output : points to after the last UChar copied.
 * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
 * @param source I/O parameter, pointer to pointer to the source codepage buffer.
 * @param sourceLimit the pointer to the byte after the end of the source buffer
 * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
 * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
 * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
 * For output data carried across calls, and other data without a specific source character
 * (such as from escape sequences or callbacks)  -1 will be placed for offsets.
 * @param flush set to <TT>true</TT> if the current source buffer is the last available
 * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
 * this function may have to be called multiple times with flush set to <TT>true</TT> until
 * the source buffer is consumed.
 * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
 * converter is <TT>NULL</TT>.
 * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
 * still data to be written to the target.
 * @see ucnv_fromUChars
 * @see ucnv_convert
 * @see ucnv_getMinCharSize
 * @see ucnv_setFromUCallBack
 * @see ucnv_getNextUChar
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_toUnicode(UConverter *converter,
               UChar **target,
               const UChar *targetLimit,
               const char **source,
               const char *sourceLimit,
               int32_t *offsets,
               UBool flush,
               UErrorCode *err);

/**
 * Convert the Unicode string into a codepage string using an existing UConverter.
 * The output string is NUL-terminated if possible.
 *
 * This function is a more convenient but less powerful version of ucnv_fromUnicode().
 * It is only useful for whole strings, not for streaming conversion.
 *
 * The maximum output buffer capacity required (barring output from callbacks) will be
 * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
 *
 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
 * @param src the input Unicode string
 * @param srcLength the input string length, or -1 if NUL-terminated
 * @param dest destination string buffer, can be NULL if destCapacity==0
 * @param destCapacity the number of chars available at dest
 * @param pErrorCode normal ICU error code;
 *                  common error codes that may be set by this function include
 *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
 *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
 * @return the length of the output string, not counting the terminating NUL;
 *         if the length is greater than destCapacity, then the string will not fit
 *         and a buffer of the indicated length would need to be passed in
 * @see ucnv_fromUnicode
 * @see ucnv_convert
 * @see UCNV_GET_MAX_BYTES_FOR_STRING
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_fromUChars(UConverter *cnv,
                char *dest, int32_t destCapacity,
                const UChar *src, int32_t srcLength,
                UErrorCode *pErrorCode);

/**
 * Convert the codepage string into a Unicode string using an existing UConverter.
 * The output string is NUL-terminated if possible.
 *
 * This function is a more convenient but less powerful version of ucnv_toUnicode().
 * It is only useful for whole strings, not for streaming conversion.
 *
 * The maximum output buffer capacity required (barring output from callbacks) will be
 * 2*srcLength (each char may be converted into a surrogate pair).
 *
 * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
 * @param src the input codepage string
 * @param srcLength the input string length, or -1 if NUL-terminated
 * @param dest destination string buffer, can be NULL if destCapacity==0
 * @param destCapacity the number of UChars available at dest
 * @param pErrorCode normal ICU error code;
 *                  common error codes that may be set by this function include
 *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
 *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
 * @return the length of the output string, not counting the terminating NUL;
 *         if the length is greater than destCapacity, then the string will not fit
 *         and a buffer of the indicated length would need to be passed in
 * @see ucnv_toUnicode
 * @see ucnv_convert
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_toUChars(UConverter *cnv,
              UChar *dest, int32_t destCapacity,
              const char *src, int32_t srcLength,
              UErrorCode *pErrorCode);

/**
 * Convert a codepage buffer into Unicode one character at a time.
 * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
 *
 * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
 * - Faster for small amounts of data, for most converters, e.g.,
 *   US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
 *   (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
 *    it uses ucnv_toUnicode() internally.)
 * - Convenient.
 *
 * Limitations compared to ucnv_toUnicode():
 * - Always assumes flush=true.
 *   This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
 *   that is, for where the input is supplied in multiple buffers,
 *   because ucnv_getNextUChar() will assume the end of the input at the end
 *   of the first buffer.
 * - Does not provide offset output.
 *
 * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
 * ucnv_getNextUChar() uses the current state of the converter
 * (unlike ucnv_toUChars() which always resets first).
 * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
 * stopped in the middle of a character sequence (with flush=false),
 * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
 * internally until the next character boundary.
 * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
 * start at a character boundary.)
 *
 * Instead of using ucnv_getNextUChar(), it is recommended
 * to convert using ucnv_toUnicode() or ucnv_toUChars()
 * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
 * or a C++ CharacterIterator or similar.
 * This allows streaming conversion and offset output, for example.
 *
 * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
 * There are two different kinds of codepages that provide mappings for surrogate characters:
 * <ul>
 *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
 *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
 *       Each valid sequence will result in exactly one returned code point.
 *       If a sequence results in a single surrogate, then that will be returned
 *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
 *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
 *       including surrogates. Code points in supplementary planes are represented with
 *       two sequences, each encoding a surrogate.
 *       For these codepages, matching pairs of surrogates will be combined into single
 *       code points for returning from this function.
 *       (Note that SCSU is actually a mix of these codepage types.)</li>
 * </ul></p>
 *
 * @param converter an open UConverter
 * @param source the address of a pointer to the codepage buffer, will be
 *  updated to point after the bytes consumed in the conversion call.
 * @param sourceLimit points to the end of the input buffer
 * @param err fills in error status (see ucnv_toUnicode)
 * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
 * is empty or does not convert to any output (e.g.: pure state-change
 * codes SI/SO, escape sequences for ISO 2022,
 * or if the callback did not output anything, ...).
 * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
 *  the "buffer" is the return code. However, there might be subsequent output
 *  stored in the converter object
 * that will be returned in following calls to this function.
 * @return a UChar32 resulting from the partial conversion of source
 * @see ucnv_toUnicode
 * @see ucnv_toUChars
 * @see ucnv_convert
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
ucnv_getNextUChar(UConverter * converter,
                  const char **source,
                  const char * sourceLimit,
                  UErrorCode * err);

/**
 * Convert from one external charset to another using two existing UConverters.
 * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
 * are used, "pivoting" through 16-bit Unicode.
 *
 * Important: For streaming conversion (multiple function calls for successive
 * parts of a text stream), the caller must provide a pivot buffer explicitly,
 * and must preserve the pivot buffer and associated pointers from one
 * call to another. (The buffer may be moved if its contents and the relative
 * pointer positions are preserved.)
 *
 * There is a similar function, ucnv_convert(),
 * which has the following limitations:
 * - it takes charset names, not converter objects, so that
 *   - two converters are opened for each call
 *   - only single-string conversion is possible, not streaming operation
 * - it does not provide enough information to find out,
 *   in case of failure, whether the toUnicode or
 *   the fromUnicode conversion failed
 *
 * By contrast, ucnv_convertEx()
 * - takes UConverter parameters instead of charset names
 * - fully exposes the pivot buffer for streaming conversion and complete error handling
 *
 * ucnv_convertEx() also provides further convenience:
 * - an option to reset the converters at the beginning
 *   (if reset==true, see parameters;
 *    also sets *pivotTarget=*pivotSource=pivotStart)
 * - allow NUL-terminated input
 *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
 *   (if sourceLimit==NULL, see parameters)
 * - terminate with a NUL on output
 *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
 *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
 *   the target buffer
 * - the pivot buffer can be provided internally;
 *   possible only for whole-string conversion, not streaming conversion;
 *   in this case, the caller will not be able to get details about where an
 *   error occurred
 *   (if pivotStart==NULL, see below)
 *
 * The function returns when one of the following is true:
 * - the entire source text has been converted successfully to the target buffer
 * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
 * - a conversion error occurred
 *   (other U_FAILURE(), see description of pErrorCode)
 *
 * Limitation compared to the direct use of
 * ucnv_fromUnicode() and ucnv_toUnicode():
 * ucnv_convertEx() does not provide offset information.
 *
 * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
 * ucnv_convertEx() does not support preflighting directly.
 *
 * Sample code for converting a single string from
 * one external charset to UTF-8, ignoring the location of errors:
 *
 * \code
 * int32_t
 * myToUTF8(UConverter *cnv,
 *          const char *s, int32_t length,
 *          char *u8, int32_t capacity,
 *          UErrorCode *pErrorCode) {
 *     UConverter *utf8Cnv;
 *     char *target;
 *
 *     if(U_FAILURE(*pErrorCode)) {
 *         return 0;
 *     }
 *
 *     utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
 *     if(U_FAILURE(*pErrorCode)) {
 *         return 0;
 *     }
 *
 *     if(length<0) {
 *         length=strlen(s);
 *     }
 *     target=u8;
 *     ucnv_convertEx(utf8Cnv, cnv,
 *                    &target, u8+capacity,
 *                    &s, s+length,
 *                    NULL, NULL, NULL, NULL,
 *                    true, true,
 *                    pErrorCode);
 *
 *     myReleaseCachedUTF8Converter(utf8Cnv);
 *
 *     // return the output string length, but without preflighting
 *     return (int32_t)(target-u8);
 * }
 * \endcode
 *
 * @param targetCnv     Output converter, used to convert from the UTF-16 pivot
 *                      to the target using ucnv_fromUnicode().
 * @param sourceCnv     Input converter, used to convert from the source to
 *                      the UTF-16 pivot using ucnv_toUnicode().
 * @param target        I/O parameter, same as for ucnv_fromUChars().
 *                      Input: *target points to the beginning of the target buffer.
 *                      Output: *target points to the first unit after the last char written.
 * @param targetLimit   Pointer to the first unit after the target buffer.
 * @param source        I/O parameter, same as for ucnv_toUChars().
 *                      Input: *source points to the beginning of the source buffer.
 *                      Output: *source points to the first unit after the last char read.
 * @param sourceLimit   Pointer to the first unit after the source buffer.
 * @param pivotStart    Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
 *                      then an internal buffer is used and the other pivot
 *                      arguments are ignored and can be NULL as well.
 * @param pivotSource   I/O parameter, same as source in ucnv_fromUChars() for
 *                      conversion from the pivot buffer to the target buffer.
 * @param pivotTarget   I/O parameter, same as target in ucnv_toUChars() for
 *                      conversion from the source buffer to the pivot buffer.
 *                      It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
 *                      and pivotStart<pivotLimit (unless pivotStart==NULL).
 * @param pivotLimit    Pointer to the first unit after the pivot buffer.
 * @param reset         If true, then ucnv_resetToUnicode(sourceCnv) and
 *                      ucnv_resetFromUnicode(targetCnv) are called, and the
 *                      pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
 * @param flush         If true, indicates the end of the input.
 *                      Passed directly to ucnv_toUnicode(), and carried over to
 *                      ucnv_fromUnicode() when the source is empty as well.
 * @param pErrorCode    ICU error code in/out parameter.
 *                      Must fulfill U_SUCCESS before the function call.
 *                      U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
 *                      because overflows into the pivot buffer are handled internally.
 *                      Other conversion errors are from the source-to-pivot
 *                      conversion if *pivotSource==pivotStart, otherwise from
 *                      the pivot-to-target conversion.
 *
 * @see ucnv_convert
 * @see ucnv_fromAlgorithmic
 * @see ucnv_toAlgorithmic
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars
 * @see ucnv_toUChars
 * @stable ICU 2.6
 */
U_CAPI void U_EXPORT2
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
               char **target, const char *targetLimit,
               const char **source, const char *sourceLimit,
               UChar *pivotStart, UChar **pivotSource,
               UChar **pivotTarget, const UChar *pivotLimit,
               UBool reset, UBool flush,
               UErrorCode *pErrorCode);

/**
 * Convert from one external charset to another.
 * Internally, two converters are opened according to the name arguments,
 * then the text is converted to and from the 16-bit Unicode "pivot"
 * using ucnv_convertEx(), then the converters are closed again.
 *
 * This is a convenience function, not an efficient way to convert a lot of text:
 * ucnv_convert()
 * - takes charset names, not converter objects, so that
 *   - two converters are opened for each call
 *   - only single-string conversion is possible, not streaming operation
 * - does not provide enough information to find out,
 *   in case of failure, whether the toUnicode or
 *   the fromUnicode conversion failed
 * - allows NUL-terminated input
 *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
 *   (if sourceLength==-1, see parameters)
 * - terminate with a NUL on output
 *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
 *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
 *   the target buffer
 * - a pivot buffer is provided internally
 *
 * The function returns when one of the following is true:
 * - the entire source text has been converted successfully to the target buffer
 *   and either the target buffer is terminated with a single NUL byte
 *   or the error code is set to U_STRING_NOT_TERMINATED_WARNING
 * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
 *   and the full output string length is returned ("preflighting")
 * - a conversion error occurred
 *   (other U_FAILURE(), see description of pErrorCode)
 *
 * @param toConverterName   The name of the converter that is used to convert
 *                          from the UTF-16 pivot buffer to the target.
 * @param fromConverterName The name of the converter that is used to convert
 *                          from the source to the UTF-16 pivot buffer.
 * @param target            Pointer to the output buffer.
 * @param targetCapacity    Capacity of the target, in bytes.
 * @param source            Pointer to the input buffer.
 * @param sourceLength      Length of the input text, in bytes, or -1 for NUL-terminated input.
 * @param pErrorCode        ICU error code in/out parameter.
 *                          Must fulfill U_SUCCESS before the function call.
 * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
 *         and a U_BUFFER_OVERFLOW_ERROR is set.
 *
 * @see ucnv_convertEx
 * @see ucnv_fromAlgorithmic
 * @see ucnv_toAlgorithmic
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars
 * @see ucnv_toUChars
 * @see ucnv_getNextUChar
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_convert(const char *toConverterName,
             const char *fromConverterName,
             char *target,
             int32_t targetCapacity,
             const char *source,
             int32_t sourceLength,
             UErrorCode *pErrorCode);

/**
 * Convert from one external charset to another.
 * Internally, the text is converted to and from the 16-bit Unicode "pivot"
 * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
 * except that the two converters need not be looked up and opened completely.
 *
 * The source-to-pivot conversion uses the cnv converter parameter.
 * The pivot-to-target conversion uses a purely algorithmic converter
 * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
 *
 * Internally, the algorithmic converter is opened and closed for each
 * function call, which is more efficient than using the public ucnv_open()
 * but somewhat less efficient than only resetting an existing converter
 * and using ucnv_convertEx().
 *
 * This function is more convenient than ucnv_convertEx() for single-string
 * conversions, especially when "preflighting" is desired (returning the length
 * of the complete output even if it does not fit into the target buffer;
 * see the User Guide Strings chapter). See ucnv_convert() for details.
 *
 * @param algorithmicType   UConverterType constant identifying the desired target
 *                          charset as a purely algorithmic converter.
 *                          Those are converters for Unicode charsets like
 *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
 *                          as well as US-ASCII and ISO-8859-1.
 * @param cnv               The converter that is used to convert
 *                          from the source to the UTF-16 pivot buffer.
 * @param target            Pointer to the output buffer.
 * @param targetCapacity    Capacity of the target, in bytes.
 * @param source            Pointer to the input buffer.
 * @param sourceLength      Length of the input text, in bytes
 * @param pErrorCode        ICU error code in/out parameter.
 *                          Must fulfill U_SUCCESS before the function call.
 * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
 *         and a U_BUFFER_OVERFLOW_ERROR is set.
 *
 * @see ucnv_fromAlgorithmic
 * @see ucnv_convert
 * @see ucnv_convertEx
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars
 * @see ucnv_toUChars
 * @stable ICU 2.6
 */
U_CAPI int32_t U_EXPORT2
ucnv_toAlgorithmic(UConverterType algorithmicType,
                   UConverter *cnv,
                   char *target, int32_t targetCapacity,
                   const char *source, int32_t sourceLength,
                   UErrorCode *pErrorCode);

/**
 * Convert from one external charset to another.
 * Internally, the text is converted to and from the 16-bit Unicode "pivot"
 * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
 * except that the two converters need not be looked up and opened completely.
 *
 * The source-to-pivot conversion uses a purely algorithmic converter
 * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
 * The pivot-to-target conversion uses the cnv converter parameter.
 *
 * Internally, the algorithmic converter is opened and closed for each
 * function call, which is more efficient than using the public ucnv_open()
 * but somewhat less efficient than only resetting an existing converter
 * and using ucnv_convertEx().
 *
 * This function is more convenient than ucnv_convertEx() for single-string
 * conversions, especially when "preflighting" is desired (returning the length
 * of the complete output even if it does not fit into the target buffer;
 * see the User Guide Strings chapter). See ucnv_convert() for details.
 *
 * @param cnv               The converter that is used to convert
 *                          from the UTF-16 pivot buffer to the target.
 * @param algorithmicType   UConverterType constant identifying the desired source
 *                          charset as a purely algorithmic converter.
 *                          Those are converters for Unicode charsets like
 *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
 *                          as well as US-ASCII and ISO-8859-1.
 * @param target            Pointer to the output buffer.
 * @param targetCapacity    Capacity of the target, in bytes.
 * @param source            Pointer to the input buffer.
 * @param sourceLength      Length of the input text, in bytes
 * @param pErrorCode        ICU error code in/out parameter.
 *                          Must fulfill U_SUCCESS before the function call.
 * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
 *         and a U_BUFFER_OVERFLOW_ERROR is set.
 *
 * @see ucnv_fromAlgorithmic
 * @see ucnv_convert
 * @see ucnv_convertEx
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars
 * @see ucnv_toUChars
 * @stable ICU 2.6
 */
U_CAPI int32_t U_EXPORT2
ucnv_fromAlgorithmic(UConverter *cnv,
                     UConverterType algorithmicType,
                     char *target, int32_t targetCapacity,
                     const char *source, int32_t sourceLength,
                     UErrorCode *pErrorCode);

/**
 * Frees up memory occupied by unused, cached converter shared data.
 *
 * @return the number of cached converters successfully deleted
 * @see ucnv_close
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_flushCache(void);

/**
 * Returns the number of available converters, as per the alias file.
 *
 * @return the number of available converters
 * @see ucnv_getAvailableName
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
ucnv_countAvailable(void);

/**
 * Gets the canonical converter name of the specified converter from a list of
 * all available converters contained in the alias file. All converters
 * in this list can be opened.
 *
 * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
 * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
 * @see ucnv_countAvailable
 * @stable ICU 2.0
 */
U_CAPI const char* U_EXPORT2
ucnv_getAvailableName(int32_t n);

/**
 * Returns a UEnumeration to enumerate all of the canonical converter
 * names, as per the alias file, regardless of the ability to open each
 * converter.
 *
 * @return A UEnumeration object for getting all the recognized canonical
 *   converter names.
 * @see ucnv_getAvailableName
 * @see uenum_close
 * @see uenum_next
 * @stable ICU 2.4
 */
U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode *pErrorCode);

/**
 * Gives the number of aliases for a given converter or alias name.
 * If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
 * This method only enumerates the listed entries in the alias file.
 * @param alias alias name
 * @param pErrorCode error status
 * @return number of names on alias list for given alias
 * @stable ICU 2.0
 */
U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);

/**
 * Gives the name of the alias at given index of alias list.
 * This method only enumerates the listed entries in the alias file.
 * If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
 * @param alias alias name
 * @param n index in alias list
 * @param pErrorCode result of operation
 * @return returns the name of the alias at given index
 * @see ucnv_countAliases
 * @stable ICU 2.0
 */
U_CAPI const char * U_EXPORT2
ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);

/**
 * Fill-up the list of alias names for the given alias.
 * This method only enumerates the listed entries in the alias file.
 * If the alias is ambiguous, then the preferred converter is used
 * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
 * @param alias alias name
 * @param aliases fill-in list, aliases is a pointer to an array of
 *        <code>ucnv_countAliases()</code> string-pointers
 *        (<code>const char *</code>) that will be filled in.
 *        The strings themselves are owned by the library.
 * @param pErrorCode result of operation
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);

/**
 * Return a new UEnumeration object for enumerating all the
 * alias names for a given converter that are recognized by a standard.
 * This method only enumerates the listed entries in the alias file.
 * The convrtrs.txt file can be modified to change the results of
 * this function.
 * The first result in this list is the same result given by
 * <code>ucnv_getStandardName</code>, which is the default alias for
 * the specified standard name. The returned object must be closed with
 * <code>uenum_close</code> when you are done with the object.
 *
 * @param convName original converter name
 * @param standard name of the standard governing the names; MIME and IANA
 *      are such standards
 * @param pErrorCode The error code
 * @return A UEnumeration object for getting all aliases that are recognized
 *      by a standard. If any of the parameters are invalid, NULL
 *      is returned.
 * @see ucnv_getStandardName
 * @see uenum_close
 * @see uenum_next
 * @stable ICU 2.2
 */
U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char *convName,
                       const char *standard,
                       UErrorCode *pErrorCode);

/**
 * Gives the number of standards associated to converter names.
 * @return number of standards
 * @stable ICU 2.0
 */
U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void);

/**
 * Gives the name of the standard at given index of standard list.
 * @param n index in standard list
 * @param pErrorCode result of operation
 * @return returns the name of the standard at given index. Owned by the library.
 * @stable ICU 2.0
 */
U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);

/**
 * Returns a standard name for a given converter name.
 * <p>
 * Example alias table:<br>
 * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
 * <p>
 * Result of ucnv_getStandardName("conv", "STANDARD1") from example
 * alias table:<br>
 * <b>"alias2"</b>
 *
 * @param name original converter name
 * @param standard name of the standard governing the names; MIME and IANA
 *        are such standards
 * @param pErrorCode result of operation
 * @return returns the standard converter name;
 *         if a standard converter name cannot be determined,
 *         then <code>NULL</code> is returned. Owned by the library.
 * @stable ICU 2.0
 */
U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);

/**
 * This function will return the internal canonical converter name of the
 * tagged alias. This is the opposite of ucnv_openStandardNames, which
 * returns the tagged alias given the canonical name.
 * <p>
 * Example alias table:<br>
 * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
 * <p>
 * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
 * alias table:<br>
 * <b>"conv"</b>
 *
 * @return returns the canonical converter name;
 *         if a standard or alias name cannot be determined,
 *         then <code>NULL</code> is returned. The returned string is
 *         owned by the library.
 * @see ucnv_getStandardName
 * @stable ICU 2.4
 */
U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);

/**
 * Returns the current default converter name. If you want to open
 * a default converter, you do not need to use this function.
 * It is faster if you pass a NULL argument to ucnv_open the
 * default converter.
 *
 * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
 * always returns "UTF-8".
 *
 * @return returns the current default converter name.
 *         Storage owned by the library
 * @see ucnv_setDefaultName
 * @stable ICU 2.0
 */
U_CAPI const char * U_EXPORT2
ucnv_getDefaultName(void);

#ifndef U_HIDE_SYSTEM_API
/**
 * This function is not thread safe. DO NOT call this function when ANY ICU
 * function is being used from more than one thread! This function sets the
 * current default converter name. If this function needs to be called, it
 * should be called during application initialization. Most of the time, the
 * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
 * is sufficient for your application.
 *
 * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
 * does nothing.
 *
 * @param name the converter name to be the default (must be known by ICU).
 * @see ucnv_getDefaultName
 * @system
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_setDefaultName(const char *name);
#endif  /* U_HIDE_SYSTEM_API */

/**
 * Fixes the backslash character mismapping.  For example, in SJIS, the backslash
 * character in the ASCII portion is also used to represent the yen currency sign.
 * When mapping from Unicode character 0x005C, it's unclear whether to map the
 * character back to yen or backslash in SJIS.  This function will take the input
 * buffer and replace all the yen sign characters with backslash.  This is necessary
 * when the user tries to open a file with the input buffer on Windows.
 * This function will test the converter to see whether such mapping is
 * required.  You can sometimes avoid using this function by using the correct version
 * of Shift-JIS.
 *
 * @param cnv The converter representing the target codepage.
 * @param source the input buffer to be fixed
 * @param sourceLen the length of the input buffer
 * @see ucnv_isAmbiguous
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);

/**
 * Determines if the converter contains ambiguous mappings of the same
 * character or not.
 * @param cnv the converter to be tested
 * @return true if the converter contains ambiguous mapping of the same
 * character, false otherwise.
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
ucnv_isAmbiguous(const UConverter *cnv);

/**
 * Sets the converter to use fallback mappings or not.
 * Regardless of this flag, the converter will always use
 * fallbacks from Unicode Private Use code points, as well as
 * reverse fallbacks (to Unicode).
 * For details see ".ucm File Format"
 * in the Conversion Data chapter of the ICU User Guide:
 * https://unicode-org.github.io/icu/userguide/conversion/data.html#ucm-file-format
 *
 * @param cnv The converter to set the fallback mapping usage on.
 * @param usesFallback true if the user wants the converter to take advantage of the fallback
 * mapping, false otherwise.
 * @stable ICU 2.0
 * @see ucnv_usesFallback
 */
U_CAPI void U_EXPORT2
ucnv_setFallback(UConverter *cnv, UBool usesFallback);

/**
 * Determines if the converter uses fallback mappings or not.
 * This flag has restrictions, see ucnv_setFallback().
 *
 * @param cnv The converter to be tested
 * @return true if the converter uses fallback, false otherwise.
 * @stable ICU 2.0
 * @see ucnv_setFallback
 */
U_CAPI UBool U_EXPORT2
ucnv_usesFallback(const UConverter *cnv);

/**
 * Detects Unicode signature byte sequences at the start of the byte stream
 * and returns the charset name of the indicated Unicode charset.
 * NULL is returned when no Unicode signature is recognized.
 * The number of bytes in the signature is output as well.
 *
 * The caller can ucnv_open() a converter using the charset name.
 * The first code unit (UChar) from the start of the stream will be U+FEFF
 * (the Unicode BOM/signature character) and can usually be ignored.
 *
 * For most Unicode charsets it is also possible to ignore the indicated
 * number of initial stream bytes and start converting after them.
 * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
 * this will not work. Therefore, it is best to ignore the first output UChar
 * instead of the input signature bytes.
 * <p>
 * Usage:
 * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
 *
 * @param source            The source string in which the signature should be detected.
 * @param sourceLength      Length of the input string, or -1 if terminated with a NUL byte.
 * @param signatureLength   A pointer to int32_t to receive the number of bytes that make up the signature
 *                          of the detected UTF. 0 if not detected.
 *                          Can be a NULL pointer.
 * @param pErrorCode        ICU error code in/out parameter.
 *                          Must fulfill U_SUCCESS before the function call.
 * @return The name of the encoding detected. NULL if encoding is not detected.
 * @stable ICU 2.4
 */
U_CAPI const char* U_EXPORT2
ucnv_detectUnicodeSignature(const char* source,
                            int32_t sourceLength,
                            int32_t *signatureLength,
                            UErrorCode *pErrorCode);

/**
 * Returns the number of UChars held in the converter's internal state
 * because more input is needed for completing the conversion. This function is
 * useful for mapping semantics of ICU's converter interface to those of iconv,
 * and this information is not needed for normal conversion.
 * @param cnv       The converter in which the input is held
 * @param status    ICU error code in/out parameter.
 *                  Must fulfill U_SUCCESS before the function call.
 * @return The number of UChars in the state. -1 if an error is encountered.
 * @stable ICU 3.4
 */
U_CAPI int32_t U_EXPORT2
ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);

/**
 * Returns the number of chars held in the converter's internal state
 * because more input is needed for completing the conversion. This function is
 * useful for mapping semantics of ICU's converter interface to those of iconv,
 * and this information is not needed for normal conversion.
 * @param cnv       The converter in which the input is held as internal state
 * @param status    ICU error code in/out parameter.
 *                  Must fulfill U_SUCCESS before the function call.
 * @return The number of chars in the state. -1 if an error is encountered.
 * @stable ICU 3.4
 */
U_CAPI int32_t U_EXPORT2
ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);

/**
 * Returns whether or not the charset of the converter has a fixed number of bytes
 * per charset character.
 * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
 * Another example is UTF-32 which is always 4 bytes per character.
 * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
 * but a UTF-32 converter encodes each code point with 4 bytes.
 * Note: This method is not intended to be used to determine whether the charset has a
 * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
 * false is returned with the UErrorCode if error occurs or cnv is NULL.
 * @param cnv       The converter to be tested
 * @param status    ICU error code in/out parameter
 * @return true if the converter is fixed-width
 * @stable ICU 4.8
 */
U_CAPI UBool U_EXPORT2
ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);

#endif

#endif
/*_UCNV*/
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            // © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
***************************************************************************
* Copyright (C) 1999-2016, International Business Machines Corporation
* and others. All Rights Reserved.
***************************************************************************
*   Date        Name        Description
*   10/20/99    alan        Creation.
***************************************************************************
*/

#ifndef UNICODESET_H
#define UNICODESET_H

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#include "unicode/ucpmap.h"
#include "unicode/unifilt.h"
#include "unicode/unistr.h"
#include "unicode/uset.h"

/**
 * \file
 * \brief C++ API: Unicode Set
 */

U_NAMESPACE_BEGIN

// Forward Declarations.
class BMPSet;
class ParsePosition;
class RBBIRuleScanner;
class SymbolTable;
class UnicodeSetStringSpan;
class UVector;
class RuleCharacterIterator;

/**
 * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
 * represent <em>character classes</em> used in regular expressions.
 * A character specifies a subset of Unicode code points.  Legal
 * code points are U+0000 to U+10FFFF, inclusive.
 *
 * <p>The UnicodeSet class is not designed to be subclassed.
 *
 * <p><code>UnicodeSet</code> supports two APIs. The first is the
 * <em>operand</em> API that allows the caller to modify the value of
 * a <code>UnicodeSet</code> object. It conforms to Java 2's
 * <code>java.util.Set</code> interface, although
 * <code>UnicodeSet</code> does not actually implement that
 * interface. All methods of <code>Set</code> are supported, with the
 * modification that they take a character range or single character
 * instead of an <code>Object</code>, and they take a
 * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
 * operand API may be thought of in terms of boolean logic: a boolean
 * OR is implemented by <code>add</code>, a boolean AND is implemented
 * by <code>retain</code>, a boolean XOR is implemented by
 * <code>complement</code> taking an argument, and a boolean NOT is
 * implemented by <code>complement</code> with no argument.  In terms
 * of traditional set theory function names, <code>add</code> is a
 * union, <code>retain</code> is an intersection, <code>remove</code>
 * is an asymmetric difference, and <code>complement</code> with no
 * argument is a set complement with respect to the superset range
 * <code>MIN_VALUE-MAX_VALUE</code>
 *
 * <p>The second API is the
 * <code>applyPattern()</code>/<code>toPattern()</code> API from the
 * <code>java.text.Format</code>-derived classes.  Unlike the
 * methods that add characters, add categories, and control the logic
 * of the set, the method <code>applyPattern()</code> sets all
 * attributes of a <code>UnicodeSet</code> at once, based on a
 * string pattern.
 *
 * <p><b>Pattern syntax</b></p>
 *
 * Patterns are accepted by the constructors and the
 * <code>applyPattern()</code> methods and returned by the
 * <code>toPattern()</code> method.  These patterns follow a syntax
 * similar to that employed by version 8 regular expression character
 * classes.  Here are some simple examples:
 *
 * \htmlonly<blockquote>\endhtmlonly
 *   <table>
 *     <tr align="top">
 *       <td nowrap valign="top" align="left"><code>[]</code></td>
 *       <td valign="top">No characters</td>
 *     </tr><tr align="top">
 *       <td nowrap valign="top" align="left"><code>[a]</code></td>
 *       <td valign="top">The character 'a'</td>
 *     </tr><tr align="top">
 *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
 *       <td valign="top">The characters 'a' and 'e'</td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
 *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
 *       point order</td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
 *       <td valign="top">The character U+4E01</td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
 *       <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
 *       &quot;ac&quot;</td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
 *       <td valign="top">All characters in the general category Uppercase Letter</td>
 *     </tr>
 *   </table>
 * \htmlonly</blockquote>\endhtmlonly
 *
 * Any character may be preceded by a backslash in order to remove any special
 * meaning.  White space characters, as defined by UCharacter.isWhitespace(), are
 * ignored, unless they are escaped.
 *
 * <p>Property patterns specify a set of characters having a certain
 * property as defined by the Unicode standard.  Both the POSIX-like
 * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized.  For a
 * complete list of supported property patterns, see the User's Guide
 * for UnicodeSet at
 * <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
 * https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
 * Actual determination of property data is defined by the underlying
 * Unicode database as implemented by UCharacter.
 *
 * <p>Patterns specify individual characters, ranges of characters, and
 * Unicode property sets.  When elements are concatenated, they
 * specify their union.  To complement a set, place a '^' immediately
 * after the opening '['.  Property patterns are inverted by modifying
 * their delimiters; "[:^foo]" and "\\P{foo}".  In any other location,
 * '^' has no special meaning.
 *
 * <p>Since ICU 70, "[^...]", "[:^foo]", "\\P{foo}", and "[:binaryProperty=No:]"
 * perform a “code point complement” (all code points minus the original set),
 * removing all multicharacter strings,
 * equivalent to <code>.complement().removeAllStrings()</code>.
 * The complement() API function continues to perform a
 * symmetric difference with all code points and thus retains all multicharacter strings.
 *
 * <p>Ranges are indicated by placing two a '-' between two
 * characters, as in "a-z".  This specifies the range of all
 * characters from the left to the right, in Unicode order.  If the
 * left character is greater than or equal to the
 * right character it is a syntax error.  If a '-' occurs as the first
 * character after the opening '[' or '[^', or if it occurs as the
 * last character before the closing ']', then it is taken as a
 * literal.  Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
 * set of three characters, 'a', 'b', and '-'.
 *
 * <p>Sets may be intersected using the '&' operator or the asymmetric
 * set difference may be taken using the '-' operator, for example,
 * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
 * with values less than 4096.  Operators ('&' and '|') have equal
 * precedence and bind left-to-right.  Thus
 * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
 * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
 * difference; intersection is commutative.
 *
 * <table>
 * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
 * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
 * through 'z' and all letters in between, in Unicode order
 * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
 * all characters but 'a' through 'z',
 * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
 * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
 * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
 * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
 * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
 * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
 * <td>The asymmetric difference of sets specified by <em>pat1</em> and
 * <em>pat2</em>
 * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
 * <td>The set of characters having the specified
 * Unicode property; in
 * this case, Unicode uppercase letters
 * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
 * <td>The set of characters <em>not</em> having the given
 * Unicode property
 * </table>
 *
 * <p><b>Formal syntax</b></p>
 *
 * \htmlonly<blockquote>\endhtmlonly
 *   <table>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
 *       <td valign="top"><code>('[' '^'? item* ']') |
 *       property</code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
 *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
 *       </code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
 *       <td valign="top"><code>pattern | pattern-expr pattern |
 *       pattern-expr op pattern<br>
 *       </code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
 *       <td valign="top"><code>'&amp;' | '-'<br>
 *       </code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
 *       <td valign="top"><code>'[' | ']' | '-'<br>
 *       </code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
 *       <td valign="top"><em>any character that is not</em><code> special<br>
 *       | ('\' </code><em>any character</em><code>)<br>
 *       | ('\\u' hex hex hex hex)<br>
 *       </code></td>
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
 *       <td valign="top"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
 *       &nbsp;&nbsp;&nbsp;&nbsp;'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
 *       <td valign="top"><em>a Unicode property set pattern</em></td>
 *     </tr>
 *   </table>
 *   <br>
 *   <table border="1">
 *     <tr>
 *       <td>Legend: <table>
 *         <tr>
 *           <td nowrap valign="top"><code>a := b</code></td>
 *           <td width="20" valign="top">&nbsp; </td>
 *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
 *         </tr>
 *         <tr>
 *           <td nowrap valign="top"><code>a?</code></td>
 *           <td valign="top"></td>
 *           <td valign="top">zero or one instance of <code>a</code><br>
 *           </td>
 *         </tr>
 *         <tr>
 *           <td nowrap valign="top"><code>a*</code></td>
 *           <td valign="top"></td>
 *           <td valign="top">one or more instances of <code>a</code><br>
 *           </td>
 *         </tr>
 *         <tr>
 *           <td nowrap valign="top"><code>a | b</code></td>
 *           <td valign="top"></td>
 *           <td valign="top">either <code>a</code> or <code>b</code><br>
 *           </td>
 *         </tr>
 *         <tr>
 *           <td nowrap valign="top"><code>'a'</code></td>
 *           <td valign="top"></td>
 *           <td valign="top">the literal string between the quotes </td>
 *         </tr>
 *       </table>
 *       </td>
 *     </tr>
 *   </table>
 * \htmlonly</blockquote>\endhtmlonly
 * 
 * <p>Note:
 *  - Most UnicodeSet methods do not take a UErrorCode parameter because
 *   there are usually very few opportunities for failure other than a shortage
 *   of memory, error codes in low-level C++ string methods would be inconvenient,
 *   and the error code as the last parameter (ICU convention) would prevent
 *   the use of default parameter values.
 *   Instead, such methods set the UnicodeSet into a "bogus" state
 *   (see isBogus()) if an error occurs.
 *
 * @author Alan Liu
 * @stable ICU 2.0
 */
class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
private:
    /**
     * Enough for sets with few ranges.
     * For example, White_Space has 10 ranges, list length 21.
     */
    static constexpr int32_t INITIAL_CAPACITY = 25;
    // fFlags constant
    static constexpr uint8_t kIsBogus = 1;  // This set is bogus (i.e. not valid)

    UChar32* list = stackList; // MUST be terminated with HIGH
    int32_t capacity = INITIAL_CAPACITY; // capacity of list
    int32_t len = 1; // length of list used; 1 <= len <= capacity
    uint8_t fFlags = 0;         // Bit flag (see constants above)

    BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not NULL.
    UChar32* buffer = nullptr; // internal buffer, may be NULL
    int32_t bufferCapacity = 0; // capacity of buffer

    /**
     * The pattern representation of this set.  This may not be the
     * most economical pattern.  It is the pattern supplied to
     * applyPattern(), with variables substituted and whitespace
     * removed.  For sets constructed without applyPattern(), or
     * modified using the non-pattern API, this string will be empty,
     * indicating that toPattern() must generate a pattern
     * representation from the inversion list.
     */
    char16_t *pat = nullptr;
    int32_t patLen = 0;

    UVector* strings = nullptr; // maintained in sorted order
    UnicodeSetStringSpan *stringSpan = nullptr;

    /**
     * Initial list array.
     * Avoids some heap allocations, and list is never nullptr.
     * Increases the object size a bit.
     */
    UChar32 stackList[INITIAL_CAPACITY];

public:
    /**
     * Determine if this object contains a valid set.
     * A bogus set has no value. It is different from an empty set.
     * It can be used to indicate that no set value is available.
     *
     * @return true if the set is bogus/invalid, false otherwise
     * @see setToBogus()
     * @stable ICU 4.0
     */
    inline UBool isBogus(void) const;

    /**
     * Make this UnicodeSet object invalid.
     * The string will test true with isBogus().
     *
     * A bogus set has no value. It is different from an empty set.
     * It can be used to indicate that no set value is available.
     *
     * This utility function is used throughout the UnicodeSet
     * implementation to indicate that a UnicodeSet operation failed,
     * and may be used in other functions,
     * especially but not exclusively when such functions do not
     * take a UErrorCode for simplicity.
     *
     * @see isBogus()
     * @stable ICU 4.0
     */
    void setToBogus();

public:

    enum {
        /**
         * Minimum value that can be stored in a UnicodeSet.
         * @stable ICU 2.4
         */
        MIN_VALUE = 0,

        /**
         * Maximum value that can be stored in a UnicodeSet.
         * @stable ICU 2.4
         */
        MAX_VALUE = 0x10ffff
    };

    //----------------------------------------------------------------
    // Constructors &c
    //----------------------------------------------------------------

public:

    /**
     * Constructs an empty set.
     * @stable ICU 2.0
     */
    UnicodeSet();

    /**
     * Constructs a set containing the given range. If <code>end <
     * start</code> then an empty set is created.
     *
     * @param start first character, inclusive, of range
     * @param end last character, inclusive, of range
     * @stable ICU 2.4
     */
    UnicodeSet(UChar32 start, UChar32 end);

#ifndef U_HIDE_INTERNAL_API
    /**
     * @internal
     */
    enum ESerialization {
      kSerialized  /* result of serialize() */
    };

    /**
     * Constructs a set from the output of serialize().
     *
     * @param buffer the 16 bit array
     * @param bufferLen the original length returned from serialize()
     * @param serialization the value 'kSerialized'
     * @param status error code
     *
     * @internal
     */
    UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
               ESerialization serialization, UErrorCode &status);
#endif  /* U_HIDE_INTERNAL_API */

    /**
     * Constructs a set from the given pattern.  See the class
     * description for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
     * @stable ICU 2.0
     */
    UnicodeSet(const UnicodeString& pattern,
               UErrorCode& status);

#ifndef U_HIDE_INTERNAL_API
    /**
     * Constructs a set from the given pattern.  See the class
     * description for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param options bitmask for options to apply to the pattern.
     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     * @param symbols a symbol table mapping variable names to values
     * and stand-in characters to UnicodeSets; may be NULL
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
     * @internal
     */
    UnicodeSet(const UnicodeString& pattern,
               uint32_t options,
               const SymbolTable* symbols,
               UErrorCode& status);
#endif  /* U_HIDE_INTERNAL_API */

    /**
     * Constructs a set from the given pattern.  See the class description
     * for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param pos on input, the position in pattern at which to start parsing.
     * On output, the position after the last character parsed.
     * @param options bitmask for options to apply to the pattern.
     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     * @param symbols a symbol table mapping variable names to values
     * and stand-in characters to UnicodeSets; may be NULL
     * @param status input-output error code
     * @stable ICU 2.8
     */
    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
               uint32_t options,
               const SymbolTable* symbols,
               UErrorCode& status);

    /**
     * Constructs a set that is identical to the given UnicodeSet.
     * @stable ICU 2.0
     */
    UnicodeSet(const UnicodeSet& o);

    /**
     * Destructs the set.
     * @stable ICU 2.0
     */
    virtual ~UnicodeSet();

    /**
     * Assigns this object to be a copy of another.
     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& operator=(const UnicodeSet& o);

    /**
     * Compares the specified object with this set for equality.  Returns
     * <tt>true</tt> if the two sets
     * have the same size, and every member of the specified set is
     * contained in this set (or equivalently, every member of this set is
     * contained in the specified set).
     *
     * @param o set to be compared for equality with this set.
     * @return <tt>true</tt> if the specified set is equal to this set.
     * @stable ICU 2.0
     */
    virtual bool operator==(const UnicodeSet& o) const;

    /**
     * Compares the specified object with this set for equality.  Returns
     * <tt>true</tt> if the specified set is not equal to this set.
     * @stable ICU 2.0
     */
    inline bool operator!=(const UnicodeSet& o) const;

    /**
     * Returns a copy of this object.  All UnicodeFunctor objects have
     * to support cloning in order to allow classes using
     * UnicodeFunctors, such as Transliterator, to implement cloning.
     * If this set is frozen, then the clone will be frozen as well.
     * Use cloneAsThawed() for a mutable clone of a frozen set.
     * @see cloneAsThawed
     * @stable ICU 2.0
     */
    virtual UnicodeSet* clone() const override;

    /**
     * Returns the hash code value for this set.
     *
     * @return the hash code value for this set.
     * @see Object#hashCode()
     * @stable ICU 2.0
     */
    virtual int32_t hashCode(void) const;

    /**
     * Get a UnicodeSet pointer from a USet
     *
     * @param uset a USet (the ICU plain C type for UnicodeSet)
     * @return the corresponding UnicodeSet pointer.
     *
     * @stable ICU 4.2
     */
    inline static UnicodeSet *fromUSet(USet *uset);

    /**
     * Get a UnicodeSet pointer from a const USet
     *
     * @param uset a const USet (the ICU plain C type for UnicodeSet)
     * @return the corresponding UnicodeSet pointer.
     *
     * @stable ICU 4.2
     */
    inline static const UnicodeSet *fromUSet(const USet *uset);
    
    /**
     * Produce a USet * pointer for this UnicodeSet.
     * USet is the plain C type for UnicodeSet
     *
     * @return a USet pointer for this UnicodeSet
     * @stable ICU 4.2
     */
    inline USet *toUSet();


    /**
     * Produce a const USet * pointer for this UnicodeSet.
     * USet is the plain C type for UnicodeSet
     *
     * @return a const USet pointer for this UnicodeSet
     * @stable ICU 4.2
     */
    inline const USet * toUSet() const;


    //----------------------------------------------------------------
    // Freezable API
    //----------------------------------------------------------------

    /**
     * Determines whether the set has been frozen (made immutable) or not.
     * See the ICU4J Freezable interface for details.
     * @return true/false for whether the set has been frozen
     * @see freeze
     * @see cloneAsThawed
     * @stable ICU 3.8
     */
    inline UBool isFrozen() const;

    /**
     * Freeze the set (make it immutable).
     * Once frozen, it cannot be unfrozen and is therefore thread-safe
     * until it is deleted.
     * See the ICU4J Freezable interface for details.
     * Freezing the set may also make some operations faster, for example
     * contains() and span().
     * A frozen set will not be modified. (It remains frozen.)
     * @return this set.
     * @see isFrozen
     * @see cloneAsThawed
     * @stable ICU 3.8
     */
    UnicodeSet *freeze();

    /**
     * Clone the set and make the clone mutable.
     * See the ICU4J Freezable interface for details.
     * @return the mutable clone
     * @see freeze
     * @see isFrozen
     * @stable ICU 3.8
     */
    UnicodeSet *cloneAsThawed() const;

    //----------------------------------------------------------------
    // Public API
    //----------------------------------------------------------------

    /**
     * Make this object represent the range `start - end`.
     * If `start > end` then this object is set to an empty range.
     * A frozen set will not be modified.
     *
     * @param start first character in the set, inclusive
     * @param end last character in the set, inclusive
     * @stable ICU 2.4
     */
    UnicodeSet& set(UChar32 start, UChar32 end);

    /**
     * Return true if the given position, in the given pattern, appears
     * to be the start of a UnicodeSet pattern.
     * @stable ICU 2.4
     */
    static UBool resemblesPattern(const UnicodeString& pattern,
                                  int32_t pos);

    /**
     * Modifies this set to represent the set specified by the given
     * pattern, ignoring Unicode Pattern_White_Space characters.
     * See the class description for the syntax of the pattern language.
     * A frozen set will not be modified.
     * @param pattern a string specifying what characters are in the set
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
     * <em> Empties the set passed before applying the pattern.</em>
     * @return a reference to this
     * @stable ICU 2.0
     */
    UnicodeSet& applyPattern(const UnicodeString& pattern,
                             UErrorCode& status);

#ifndef U_HIDE_INTERNAL_API
    /**
     * Modifies this set to represent the set specified by the given
     * pattern, optionally ignoring Unicode Pattern_White_Space characters.
     * See the class description for the syntax of the pattern language.
     * A frozen set will not be modified.
     * @param pattern a string specifying what characters are in the set
     * @param options bitmask for options to apply to the pattern.
     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     * @param symbols a symbol table mapping variable names to
     * values and stand-ins to UnicodeSets; may be NULL
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
     *<em> Empties the set passed before applying the pattern.</em>
     * @return a reference to this
     * @internal
     */
    UnicodeSet& applyPattern(const UnicodeString& pattern,
                             uint32_t options,
                             const SymbolTable* symbols,
                             UErrorCode& status);
#endif  /* U_HIDE_INTERNAL_API */

    /**
     * Parses the given pattern, starting at the given position.  The
     * character at pattern.charAt(pos.getIndex()) must be '[', or the
     * parse fails.  Parsing continues until the corresponding closing
     * ']'.  If a syntax error is encountered between the opening and
     * closing brace, the parse fails.  Upon return from a successful
     * parse, the ParsePosition is updated to point to the character
     * following the closing ']', and a StringBuffer containing a
     * pairs list for the parsed pattern is returned.  This method calls
     * itself recursively to parse embedded subpatterns.
     *<em> Empties the set passed before applying the pattern.</em>
     * A frozen set will not be modified.
     *
     * @param pattern the string containing the pattern to be parsed.
     * The portion of the string from pos.getIndex(), which must be a
     * '[', to the corresponding closing ']', is parsed.
     * @param pos upon entry, the position at which to being parsing.
     * The character at pattern.charAt(pos.getIndex()) must be a '['.
     * Upon return from a successful parse, pos.getIndex() is either
     * the character after the closing ']' of the parsed pattern, or
     * pattern.length() if the closing ']' is the last character of
     * the pattern string.
     * @param options bitmask for options to apply to the pattern.
     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     * @param symbols a symbol table mapping variable names to
     * values and stand-ins to UnicodeSets; may be NULL
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
     * @return a reference to this
     * @stable ICU 2.8
     */
    UnicodeSet& applyPattern(const UnicodeString& pattern,
                             ParsePosition& pos,
                             uint32_t options,
                             const SymbolTable* symbols,
                             UErrorCode& status);

    /**
     * Returns a string representation of this set.  If the result of
     * calling this function is passed to a UnicodeSet constructor, it
     * will produce another set that is equal to this one.
     * A frozen set will not be modified.
     * @param result the string to receive the rules.  Previous
     * contents will be deleted.
     * @param escapeUnprintable if true then convert unprintable
     * character to their hex escape representations, \\uxxxx or
     * \\Uxxxxxxxx.  Unprintable characters are those other than
     * U+000A, U+0020..U+007E.
     * @stable ICU 2.0
     */
    virtual UnicodeString& toPattern(UnicodeString& result,
                                     UBool escapeUnprintable = false) const override;

    /**
     * Modifies this set to contain those code points which have the given value
     * for the given binary or enumerated property, as returned by
     * u_getIntPropertyValue.  Prior contents of this set are lost.
     * A frozen set will not be modified.
     *
     * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
     * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
     * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
     *
     * @param value a value in the range u_getIntPropertyMinValue(prop)..
     * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
     * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
     * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
     * categories such as [:L:] to be represented.
     *
     * @param ec error code input/output parameter
     *
     * @return a reference to this set
     *
     * @stable ICU 2.4
     */
    UnicodeSet& applyIntPropertyValue(UProperty prop,
                                      int32_t value,
                                      UErrorCode& ec);

    /**
     * Modifies this set to contain those code points which have the
     * given value for the given property.  Prior contents of this
     * set are lost.
     * A frozen set will not be modified.
     *
     * @param prop a property alias, either short or long.  The name is matched
     * loosely.  See PropertyAliases.txt for names and a description of loose
     * matching.  If the value string is empty, then this string is interpreted
     * as either a General_Category value alias, a Script value alias, a binary
     * property alias, or a special ID.  Special IDs are matched loosely and
     * correspond to the following sets:
     *
     * "ANY" = [\\u0000-\\U0010FFFF],
     * "ASCII" = [\\u0000-\\u007F],
     * "Assigned" = [:^Cn:].
     *
     * @param value a value alias, either short or long.  The name is matched
     * loosely.  See PropertyValueAliases.txt for names and a description of
     * loose matching.  In addition to aliases listed, numeric values and
     * canonical combining classes may be expressed numerically, e.g., ("nv",
     * "0.5") or ("ccc", "220").  The value string may also be empty.
     *
     * @param ec error code input/output parameter
     *
     * @return a reference to this set
     *
     * @stable ICU 2.4
     */
    UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
                                   const UnicodeString& value,
                                   UErrorCode& ec);

    /**
     * Returns the number of elements in this set (its cardinality).
     * Note than the elements of a set may include both individual
     * codepoints and strings.
     *
     * This is slower than getRangeCount() because
     * it counts the code points of all ranges.
     *
     * @return the number of elements in this set (its cardinality).
     * @stable ICU 2.0
     * @see getRangeCount
     */
    virtual int32_t size(void) const;

    /**
     * Returns <tt>true</tt> if this set contains no elements.
     *
     * @return <tt>true</tt> if this set contains no elements.
     * @stable ICU 2.0
     */
    virtual UBool isEmpty(void) const;

    /**
     * @return true if this set contains multi-character strings or the empty string.
     * @stable ICU 70
     */
    UBool hasStrings() const;

    /**
     * Returns true if this set contains the given character.
     * This function works faster with a frozen set.
     * @param c character to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.0
     */
    virtual UBool contains(UChar32 c) const override;

    /**
     * Returns true if this set contains every character
     * of the given range.
     * @param start first character, inclusive, of the range
     * @param end last character, inclusive, of the range
     * @return true if the test condition is met
     * @stable ICU 2.0
     */
    virtual UBool contains(UChar32 start, UChar32 end) const;

    /**
     * Returns <tt>true</tt> if this set contains the given
     * multicharacter string.
     * @param s string to be checked for containment
     * @return <tt>true</tt> if this set contains the specified string
     * @stable ICU 2.4
     */
    UBool contains(const UnicodeString& s) const;

    /**
     * Returns true if this set contains all the characters and strings
     * of the given set.
     * @param c set to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.4
     */
    virtual UBool containsAll(const UnicodeSet& c) const;

    /**
     * Returns true if this set contains all the characters
     * of the given string.
     * @param s string containing characters to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.4
     */
    UBool containsAll(const UnicodeString& s) const;

    /**
     * Returns true if this set contains none of the characters
     * of the given range.
     * @param start first character, inclusive, of the range
     * @param end last character, inclusive, of the range
     * @return true if the test condition is met
     * @stable ICU 2.4
     */
    UBool containsNone(UChar32 start, UChar32 end) const;

    /**
     * Returns true if this set contains none of the characters and strings
     * of the given set.
     * @param c set to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.4
     */
    UBool containsNone(const UnicodeSet& c) const;

    /**
     * Returns true if this set contains none of the characters
     * of the given string.
     * @param s string containing characters to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.4
     */
    UBool containsNone(const UnicodeString& s) const;

    /**
     * Returns true if this set contains one or more of the characters
     * in the given range.
     * @param start first character, inclusive, of the range
     * @param end last character, inclusive, of the range
     * @return true if the condition is met
     * @stable ICU 2.4
     */
    inline UBool containsSome(UChar32 start, UChar32 end) const;

    /**
     * Returns true if this set contains one or more of the characters
     * and strings of the given set.
     * @param s The set to be checked for containment
     * @return true if the condition is met
     * @stable ICU 2.4
     */
    inline UBool containsSome(const UnicodeSet& s) const;

    /**
     * Returns true if this set contains one or more of the characters
     * of the given string.
     * @param s string containing characters to be checked for containment
     * @return true if the condition is met
     * @stable ICU 2.4
     */
    inline UBool containsSome(const UnicodeString& s) const;

    /**
     * Returns the length of the initial substring of the input string which
     * consists only of characters and strings that are contained in this set
     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     * or only of characters and strings that are not contained
     * in this set (USET_SPAN_NOT_CONTAINED).
     * See USetSpanCondition for details.
     * Similar to the strspn() C library function.
     * Unpaired surrogates are treated according to contains() of their surrogate code points.
     * This function works faster with a frozen set and with a non-negative string length argument.
     * @param s start of the string
     * @param length of the string; can be -1 for NUL-terminated
     * @param spanCondition specifies the containment condition
     * @return the length of the initial substring according to the spanCondition;
     *         0 if the start of the string does not fit the spanCondition
     * @stable ICU 3.8
     * @see USetSpanCondition
     */
    int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;

    /**
     * Returns the end of the substring of the input string according to the USetSpanCondition.
     * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
     * after pinning start to 0<=start<=s.length().
     * @param s the string
     * @param start the start index in the string for the span operation
     * @param spanCondition specifies the containment condition
     * @return the exclusive end of the substring according to the spanCondition;
     *         the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
     * @stable ICU 4.4
     * @see USetSpanCondition
     */
    inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;

    /**
     * Returns the start of the trailing substring of the input string which
     * consists only of characters and strings that are contained in this set
     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     * or only of characters and strings that are not contained
     * in this set (USET_SPAN_NOT_CONTAINED).
     * See USetSpanCondition for details.
     * Unpaired surrogates are treated according to contains() of their surrogate code points.
     * This function works faster with a frozen set and with a non-negative string length argument.
     * @param s start of the string
     * @param length of the string; can be -1 for NUL-terminated
     * @param spanCondition specifies the containment condition
     * @return the start of the trailing substring according to the spanCondition;
     *         the string length if the end of the string does not fit the spanCondition
     * @stable ICU 3.8
     * @see USetSpanCondition
     */
    int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;

    /**
     * Returns the start of the substring of the input string according to the USetSpanCondition.
     * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
     * after pinning limit to 0<=end<=s.length().
     * @param s the string
     * @param limit the exclusive-end index in the string for the span operation
     *              (use s.length() or INT32_MAX for spanning back from the end of the string)
     * @param spanCondition specifies the containment condition
     * @return the start of the substring according to the spanCondition;
     *         the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
     * @stable ICU 4.4
     * @see USetSpanCondition
     */
    inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;

    /**
     * Returns the length of the initial substring of the input string which
     * consists only of characters and strings that are contained in this set
     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     * or only of characters and strings that are not contained
     * in this set (USET_SPAN_NOT_CONTAINED).
     * See USetSpanCondition for details.
     * Similar to the strspn() C library function.
     * Malformed byte sequences are treated according to contains(0xfffd).
     * This function works faster with a frozen set and with a non-negative string length argument.
     * @param s start of the string (UTF-8)
     * @param length of the string; can be -1 for NUL-terminated
     * @param spanCondition specifies the containment condition
     * @return the length of the initial substring according to the spanCondition;
     *         0 if the start of the string does not fit the spanCondition
     * @stable ICU 3.8
     * @see USetSpanCondition
     */
    int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;

    /**
     * Returns the start of the trailing substring of the input string which
     * consists only of characters and strings that are contained in this set
     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     * or only of characters and strings that are not contained
     * in this set (USET_SPAN_NOT_CONTAINED).
     * See USetSpanCondition for details.
     * Malformed byte sequences are treated according to contains(0xfffd).
     * This function works faster with a frozen set and with a non-negative string length argument.
     * @param s start of the string (UTF-8)
     * @param length of the string; can be -1 for NUL-terminated
     * @param spanCondition specifies the containment condition
     * @return the start of the trailing substring according to the spanCondition;
     *         the string length if the end of the string does not fit the spanCondition
     * @stable ICU 3.8
     * @see USetSpanCondition
     */
    int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;

    /**
     * Implement UnicodeMatcher::matches()
     * @stable ICU 2.4
     */
    virtual UMatchDegree matches(const Replaceable& text,
                         int32_t& offset,
                         int32_t limit,
                         UBool incremental) override;

private:
    /**
     * Returns the longest match for s in text at the given position.
     * If limit > start then match forward from start+1 to limit
     * matching all characters except s.charAt(0).  If limit < start,
     * go backward starting from start-1 matching all characters
     * except s.charAt(s.length()-1).  This method assumes that the
     * first character, text.charAt(start), matches s, so it does not
     * check it.
     * @param text the text to match
     * @param start the first character to match.  In the forward
     * direction, text.charAt(start) is matched against s.charAt(0).
     * In the reverse direction, it is matched against
     * s.charAt(s.length()-1).
     * @param limit the limit offset for matching, either last+1 in
     * the forward direction, or last-1 in the reverse direction,
     * where last is the index of the last character to match.
     * @param s
     * @return If part of s matches up to the limit, return |limit -
     * start|.  If all of s matches before reaching the limit, return
     * s.length().  If there is a mismatch between s and text, return
     * 0
     */
    static int32_t matchRest(const Replaceable& text,
                             int32_t start, int32_t limit,
                             const UnicodeString& s);

    /**
     * Returns the smallest value i such that c < list[i].  Caller
     * must ensure that c is a legal value or this method will enter
     * an infinite loop.  This method performs a binary search.
     * @param c a character in the range MIN_VALUE..MAX_VALUE
     * inclusive
     * @return the smallest integer i in the range 0..len-1,
     * inclusive, such that c < list[i]
     */
    int32_t findCodePoint(UChar32 c) const;

public:

    /**
     * Implementation of UnicodeMatcher API.  Union the set of all
     * characters that may be matched by this object into the given
     * set.
     * @param toUnionTo the set into which to union the source characters
     * @stable ICU 2.4
     */
    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override;

    /**
     * Returns the index of the given character within this set, where
     * the set is ordered by ascending code point.  If the character
     * is not in this set, return -1.  The inverse of this method is
     * <code>charAt()</code>.
     * @return an index from 0..size()-1, or -1
     * @stable ICU 2.4
     */
    int32_t indexOf(UChar32 c) const;

    /**
     * Returns the character at the given index within this set, where
     * the set is ordered by ascending code point.  If the index is
     * out of range for characters, returns (UChar32)-1.
     * The inverse of this method is <code>indexOf()</code>.
     *
     * For iteration, this is slower than UnicodeSetIterator or
     * getRangeCount()/getRangeStart()/getRangeEnd(),
     * because for each call it skips linearly over <code>index</code>
     * characters in the ranges.
     *
     * @param index an index from 0..size()-1
     * @return the character at the given index, or (UChar32)-1.
     * @stable ICU 2.4
     */
    UChar32 charAt(int32_t index) const;

    /**
     * Adds the specified range to this set if it is not already
     * present.  If this set already contains the specified range,
     * the call leaves this set unchanged.  If <code>start > end</code>
     * then an empty range is added, leaving the set unchanged.
     * This is equivalent to a boolean logic OR, or a set UNION.
     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be added
     * to this set.
     * @param end last character, inclusive, of range to be added
     * to this set.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& add(UChar32 start, UChar32 end);

    /**
     * Adds the specified character to this set if it is not already
     * present.  If this set already contains the specified character,
     * the call leaves this set unchanged.
     * A frozen set will not be modified.
     *
     * @param c the character (code point)
     * @return this object, for chaining
     * @stable ICU 2.0
     */
    UnicodeSet& add(UChar32 c);

    /**
     * Adds the specified multicharacter to this set if it is not already
     * present.  If this set already contains the multicharacter,
     * the call leaves this set unchanged.
     * Thus "ch" => {"ch"}
     * A frozen set will not be modified.
     *
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& add(const UnicodeString& s);

 private:
    /**
     * @return a code point IF the string consists of a single one.
     * otherwise returns -1.
     * @param s string to test
     */
    static int32_t getSingleCP(const UnicodeString& s);

    void _add(const UnicodeString& s);

 public:
    /**
     * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
     * If this set already contains any particular character, it has no effect on that character.
     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& addAll(const UnicodeString& s);

    /**
     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& retainAll(const UnicodeString& s);

    /**
     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& complementAll(const UnicodeString& s);

    /**
     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& removeAll(const UnicodeString& s);

    /**
     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
     *
     * @param s the source string
     * @return a newly created set containing the given string.
     * The caller owns the return object and is responsible for deleting it.
     * @stable ICU 2.4
     */
    static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);


    /**
     * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
     * @param s the source string
     * @return a newly created set containing the given characters
     * The caller owns the return object and is responsible for deleting it.
     * @stable ICU 2.4
     */
    static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);

    /**
     * Retain only the elements in this set that are contained in the
     * specified range.  If <code>start > end</code> then an empty range is
     * retained, leaving the set empty.  This is equivalent to
     * a boolean logic AND, or a set INTERSECTION.
     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range
     * @param end last character, inclusive, of range
     * @stable ICU 2.0
     */
    virtual UnicodeSet& retain(UChar32 start, UChar32 end);


    /**
     * Retain the specified character from this set if it is present.
     * A frozen set will not be modified.
     *
     * @param c the character (code point)
     * @return this object, for chaining
     * @stable ICU 2.0
     */
    UnicodeSet& retain(UChar32 c);

    /**
     * Retains only the specified string from this set if it is present.
     * Upon return this set will be empty if it did not contain s, or
     * will only contain s if it did contain s.
     * A frozen set will not be modified.
     *
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 69
     */
    UnicodeSet& retain(const UnicodeString &s);

    /**
     * Removes the specified range from this set if it is present.
     * The set will not contain the specified range once the call
     * returns.  If <code>start > end</code> then an empty range is
     * removed, leaving the set unchanged.
     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be removed
     * from this set.
     * @param end last character, inclusive, of range to be removed
     * from this set.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& remove(UChar32 start, UChar32 end);

    /**
     * Removes the specified character from this set if it is present.
     * The set will not contain the specified range once the call
     * returns.
     * A frozen set will not be modified.
     *
     * @param c the character (code point)
     * @return this object, for chaining
     * @stable ICU 2.0
     */
    UnicodeSet& remove(UChar32 c);

    /**
     * Removes the specified string from this set if it is present.
     * The set will not contain the specified character once the call
     * returns.
     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& remove(const UnicodeString& s);

    /**
     * This is equivalent to
     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
     *
     * <strong>Note:</strong> This performs a symmetric difference with all code points
     * <em>and thus retains all multicharacter strings</em>.
     * In order to achieve a “code point complement” (all code points minus this set),
     * the easiest is to <code>.complement().removeAllStrings()</code>.
     *
     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& complement();

    /**
     * Complements the specified range in this set.  Any character in
     * the range will be removed if it is in this set, or will be
     * added if it is not in this set.  If <code>start > end</code>
     * then an empty range is complemented, leaving the set unchanged.
     * This is equivalent to a boolean logic XOR.
     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range
     * @param end last character, inclusive, of range
     * @stable ICU 2.0
     */
    virtual UnicodeSet& complement(UChar32 start, UChar32 end);

    /**
     * Complements the specified character in this set.  The character
     * will be removed if it is in this set, or will be added if it is
     * not in this set.
     * A frozen set will not be modified.
     *
     * @param c the character (code point)
     * @return this object, for chaining
     * @stable ICU 2.0
     */
    UnicodeSet& complement(UChar32 c);

    /**
     * Complement the specified string in this set.
     * The string will be removed if it is in this set, or will be added if it is not in this set.
     * A frozen set will not be modified.
     *
     * @param s the string to complement
     * @return this object, for chaining
     * @stable ICU 2.4
     */
    UnicodeSet& complement(const UnicodeString& s);

    /**
     * Adds all of the elements in the specified set to this set if
     * they're not already present.  This operation effectively
     * modifies this set so that its value is the <i>union</i> of the two
     * sets.  The behavior of this operation is unspecified if the specified
     * collection is modified while the operation is in progress.
     * A frozen set will not be modified.
     *
     * @param c set whose elements are to be added to this set.
     * @see #add(UChar32, UChar32)
     * @stable ICU 2.0
     */
    virtual UnicodeSet& addAll(const UnicodeSet& c);

    /**
     * Retains only the elements in this set that are contained in the
     * specified set.  In other words, removes from this set all of
     * its elements that are not contained in the specified set.  This
     * operation effectively modifies this set so that its value is
     * the <i>intersection</i> of the two sets.
     * A frozen set will not be modified.
     *
     * @param c set that defines which elements this set will retain.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& retainAll(const UnicodeSet& c);

    /**
     * Removes from this set all of its elements that are contained in the
     * specified set.  This operation effectively modifies this
     * set so that its value is the <i>asymmetric set difference</i> of
     * the two sets.
     * A frozen set will not be modified.
     *
     * @param c set that defines which elements will be removed from
     *          this set.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& removeAll(const UnicodeSet& c);

    /**
     * Complements in this set all elements contained in the specified
     * set.  Any character in the other set will be removed if it is
     * in this set, or will be added if it is not in this set.
     * A frozen set will not be modified.
     *
     * @param c set that defines which elements will be xor'ed from
     *          this set.
     * @stable ICU 2.4
     */
    virtual UnicodeSet& complementAll(const UnicodeSet& c);

    /**
     * Removes all of the elements from this set.  This set will be
     * empty after this call returns.
     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& clear(void);

    /**
     * Close this set over the given attribute.  For the attribute
     * USET_CASE, the result is to modify this set so that:
     *
     * 1. For each character or string 'a' in this set, all strings or
     * characters 'b' such that foldCase(a) == foldCase(b) are added
     * to this set.
     *
     * 2. For each string 'e' in the resulting set, if e !=
     * foldCase(e), 'e' will be removed.
     *
     * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
     *
     * (Here foldCase(x) refers to the operation u_strFoldCase, and a
     * == b denotes that the contents are the same, not pointer
     * comparison.)
     *
     * A frozen set will not be modified.
     *
     * @param attribute bitmask for attributes to close over.
     * Currently only the USET_CASE bit is supported.  Any undefined bits
     * are ignored.
     * @return a reference to this set.
     * @stable ICU 4.2
     */
    UnicodeSet& closeOver(int32_t attribute);

    /**
     * Remove all strings from this set.
     *
     * @return a reference to this set.
     * @stable ICU 4.2
     */
    virtual UnicodeSet &removeAllStrings();

    /**
     * Iteration method that returns the number of ranges contained in
     * this set.
     * @see #getRangeStart
     * @see #getRangeEnd
     * @stable ICU 2.4
     */
    virtual int32_t getRangeCount(void) const;

    /**
     * Iteration method that returns the first character in the
     * specified range of this set.
     * @see #getRangeCount
     * @see #getRangeEnd
     * @stable ICU 2.4
     */
    virtual UChar32 getRangeStart(int32_t index) const;

    /**
     * Iteration method that returns the last character in the
     * specified range of this set.
     * @see #getRangeStart
     * @see #getRangeEnd
     * @stable ICU 2.4
     */
    virtual UChar32 getRangeEnd(int32_t index) const;

    /**
     * Serializes this set into an array of 16-bit integers.  Serialization
     * (currently) only records the characters in the set; multicharacter
     * strings are ignored.
     *
     * The array has following format (each line is one 16-bit
     * integer):
     *
     *  length     = (n+2*m) | (m!=0?0x8000:0)
     *  bmpLength  = n; present if m!=0
     *  bmp[0]
     *  bmp[1]
     *  ...
     *  bmp[n-1]
     *  supp-high[0]
     *  supp-low[0]
     *  supp-high[1]
     *  supp-low[1]
     *  ...
     *  supp-high[m-1]
     *  supp-low[m-1]
     *
     * The array starts with a header.  After the header are n bmp
     * code points, then m supplementary code points.  Either n or m
     * or both may be zero.  n+2*m is always <= 0x7FFF.
     *
     * If there are no supplementary characters (if m==0) then the
     * header is one 16-bit integer, 'length', with value n.
     *
     * If there are supplementary characters (if m!=0) then the header
     * is two 16-bit integers.  The first, 'length', has value
     * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
     *
     * After the header the code points are stored in ascending order.
     * Supplementary code points are stored as most significant 16
     * bits followed by least significant 16 bits.
     *
     * @param dest pointer to buffer of destCapacity 16-bit integers.
     * May be NULL only if destCapacity is zero.
     * @param destCapacity size of dest, or zero.  Must not be negative.
     * @param ec error code.  Will be set to U_INDEX_OUTOFBOUNDS_ERROR
     * if n+2*m > 0x7FFF.  Will be set to U_BUFFER_OVERFLOW_ERROR if
     * n+2*m+(m!=0?2:1) > destCapacity.
     * @return the total length of the serialized format, including
     * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
     * than U_BUFFER_OVERFLOW_ERROR.
     * @stable ICU 2.4
     */
    int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;

    /**
     * Reallocate this objects internal structures to take up the least
     * possible space, without changing this object's value.
     * A frozen set will not be modified.
     * @stable ICU 2.4
     */
    virtual UnicodeSet& compact();

    /**
     * Return the class ID for this class.  This is useful only for
     * comparing to a return value from getDynamicClassID().  For example:
     * <pre>
     * .      Base* polymorphic_pointer = createPolymorphicObject();
     * .      if (polymorphic_pointer->getDynamicClassID() ==
     * .          Derived::getStaticClassID()) ...
     * </pre>
     * @return          The class ID for all objects of this class.
     * @stable ICU 2.0
     */
    static UClassID U_EXPORT2 getStaticClassID(void);

    /**
     * Implement UnicodeFunctor API.
     *
     * @return The class ID for this object. All objects of a given
     * class have the same class ID.  Objects of other classes have
     * different class IDs.
     * @stable ICU 2.4
     */
    virtual UClassID getDynamicClassID(void) const override;

private:

    // Private API for the USet API

    friend class USetAccess;

    const UnicodeString* getString(int32_t index) const;

    //----------------------------------------------------------------
    // RuleBasedTransliterator support
    //----------------------------------------------------------------

private:

    /**
     * Returns <tt>true</tt> if this set contains any character whose low byte
     * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
     * indexing.
     */
    virtual UBool matchesIndexValue(uint8_t v) const override;

private:
    friend class RBBIRuleScanner;

    //----------------------------------------------------------------
    // Implementation: Clone as thawed (see ICU4J Freezable)
    //----------------------------------------------------------------

    UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
    UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);

    //----------------------------------------------------------------
    // Implementation: Pattern parsing
    //----------------------------------------------------------------

    void applyPatternIgnoreSpace(const UnicodeString& pattern,
                                 ParsePosition& pos,
                                 const SymbolTable* symbols,
                                 UErrorCode& status);

    void applyPattern(RuleCharacterIterator& chars,
                      const SymbolTable* symbols,
                      UnicodeString& rebuiltPat,
                      uint32_t options,
                      UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                      int32_t depth,
                      UErrorCode& ec);

    //----------------------------------------------------------------
    // Implementation: Utility methods
    //----------------------------------------------------------------

    static int32_t nextCapacity(int32_t minCapacity);

    bool ensureCapacity(int32_t newLen);

    bool ensureBufferCapacity(int32_t newLen);

    void swapBuffers(void);

    UBool allocateStrings(UErrorCode &status);
    int32_t stringsSize() const;
    UBool stringsContains(const UnicodeString &s) const;

    UnicodeString& _toPattern(UnicodeString& result,
                              UBool escapeUnprintable) const;

    UnicodeString& _generatePattern(UnicodeString& result,
                                    UBool escapeUnprintable) const;

    static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);

    static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);

    static void _appendToPat(UnicodeString &result, UChar32 start, UChar32 end,
                             UBool escapeUnprintable);

    //----------------------------------------------------------------
    // Implementation: Fundamental operators
    //----------------------------------------------------------------

    void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);

    void add(const UChar32* other, int32_t otherLen, int8_t polarity);

    void retain(const UChar32* other, int32_t otherLen, int8_t polarity);

    /**
     * Return true if the given position, in the given pattern, appears
     * to be the start of a property set pattern [:foo:], \\p{foo}, or
     * \\P{foo}, or \\N{name}.
     */
    static UBool resemblesPropertyPattern(const UnicodeString& pattern,
                                          int32_t pos);

    static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
                                          int32_t iterOpts);

    /**
     * Parse the given property pattern at the given parse position
     * and set this UnicodeSet to the result.
     *
     * The original design document is out of date, but still useful.
     * Ignore the property and value names:
     * https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/unicodeset_properties.html
     *
     * Recognized syntax:
     *
     * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
     * \\p{foo} \\P{foo}  - white space not allowed within "\\p" or "\\P"
     * \\N{name}         - white space not allowed within "\\N"
     *
     * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
     * Case is ignored except in "\\p" and "\\P" and "\\N".  In 'name' leading
     * and trailing space is deleted, and internal runs of whitespace
     * are collapsed to a single space.
     *
     * We support binary properties, enumerated properties, and the
     * following non-enumerated properties:
     *
     *  Numeric_Value
     *  Name
     *  Unicode_1_Name
     *
     * @param pattern the pattern string
     * @param ppos on entry, the position at which to begin parsing.
     * This should be one of the locations marked '^':
     *
     *   [:blah:]     \\p{blah}     \\P{blah}     \\N{name}
     *   ^       %    ^       %    ^       %    ^       %
     *
     * On return, the position after the last character parsed, that is,
     * the locations marked '%'.  If the parse fails, ppos is returned
     * unchanged.
     * @param ec status
     * @return a reference to this.
     */
    UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
                                     ParsePosition& ppos,
                                     UErrorCode &ec);

    void applyPropertyPattern(RuleCharacterIterator& chars,
                              UnicodeString& rebuiltPat,
                              UErrorCode& ec);

    /**
     * A filter that returns true if the given code point should be
     * included in the UnicodeSet being constructed.
     */
    typedef UBool (*Filter)(UChar32 codePoint, void* context);

    /**
     * Given a filter, set this UnicodeSet to the code points
     * contained by that filter.  The filter MUST be
     * property-conformant.  That is, if it returns value v for one
     * code point, then it must return v for all affiliated code
     * points, as defined by the inclusions list.  See
     * getInclusions().
     * src is a UPropertySource value.
     */
    void applyFilter(Filter filter,
                     void* context,
                     const UnicodeSet* inclusions,
                     UErrorCode &status);

    /**
     * Set the new pattern to cache.
     */
    void setPattern(const UnicodeString& newPat) {
        setPattern(newPat.getBuffer(), newPat.length());
    }
    void setPattern(const char16_t *newPat, int32_t newPatLen);
    /**
     * Release existing cached pattern.
     */
    void releasePattern();

    friend class UnicodeSetIterator;
};



inline bool UnicodeSet::operator!=(const UnicodeSet& o) const {
    return !operator==(o);
}

inline UBool UnicodeSet::isFrozen() const {
    return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
}

inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
    return !containsNone(start, end);
}

inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
    return !containsNone(s);
}

inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
    return !containsNone(s);
}

inline UBool UnicodeSet::isBogus() const {
    return (UBool)(fFlags & kIsBogus);
}

inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
    return reinterpret_cast<UnicodeSet *>(uset);
}

inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
    return reinterpret_cast<const UnicodeSet *>(uset);
}

inline USet *UnicodeSet::toUSet() {
    return reinterpret_cast<USet *>(this);
}

inline const USet *UnicodeSet::toUSet() const {
    return reinterpret_cast<const USet *>(this);
}

inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
    int32_t sLength=s.length();
    if(start<0) {
        start=0;
    } else if(start>sLength) {
        start=sLength;
    }
    return start+span(s.getBuffer()+start, sLength-start, spanCondition);
}

inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
    int32_t sLength=s.length();
    if(limit<0) {
        limit=0;
    } else if(limit>sLength) {
        limit=sLength;
    }
    return spanBack(s.getBuffer(), limit, spanCondition);
}

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */

#endif
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      // © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1998-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* File unistr.h
*
* Modification History:
*
*   Date        Name        Description
*   09/25/98    stephen     Creation.
*   11/11/98    stephen     Changed per 11/9 code review.
*   04/20/99    stephen     Overhauled per 4/16 code review.
*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
*                           handleReplaceBetween(); other methods unchanged.
*   06/25/01    grhoten     Remove dependency on iostream.
******************************************************************************
*/

#ifndef UNISTR_H
#define UNISTR_H

/**
 * \file
 * \brief C++ API: Unicode String
 */

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#include <cstddef>
#include "unicode/char16ptr.h"
#include "unicode/rep.h"
#include "unicode/std_string.h"
#include "unicode/stringpiece.h"
#include "unicode/bytestream.h"

struct UConverter;          // unicode/ucnv.h

#ifndef USTRING_H
/**
 * \ingroup ustring_ustrlen
 * @param s Pointer to sequence of UChars.
 * @return Length of sequence.
 */
U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
#endif

U_NAMESPACE_BEGIN

#if !UCONFIG_NO_BREAK_ITERATION
class BreakIterator;        // unicode/brkiter.h
#endif
class Edits;

U_NAMESPACE_END

// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
/**
 * Internal string case mapping function type.
 * All error checking must be done.
 * src and dest must not overlap.
 * @internal
 */
typedef int32_t U_CALLCONV
UStringCaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
                  icu::BreakIterator *iter,
#endif
                  char16_t *dest, int32_t destCapacity,
                  const char16_t *src, int32_t srcLength,
                  icu::Edits *edits,
                  UErrorCode &errorCode);

U_NAMESPACE_BEGIN

class Locale;               // unicode/locid.h
class StringCharacterIterator;
class UnicodeStringAppendable;  // unicode/appendable.h

/* The <iostream> include has been moved to unicode/ustream.h */

/**
 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
 * which constructs a Unicode string from an invariant-character char * string.
 * About invariant characters see utypes.h.
 * This constructor has no runtime dependency on conversion code and is
 * therefore recommended over ones taking a charset name string
 * (where the empty string "" indicates invariant-character conversion).
 *
 * @stable ICU 3.2
 */
#define US_INV icu::UnicodeString::kInvariant

/**
 * Unicode String literals in C++.
 *
 * Note: these macros are not recommended for new code.
 * Prior to the availability of C++11 and u"unicode string literals",
 * these macros were provided for portability and efficiency when
 * initializing UnicodeStrings from literals.
 *
 * They work only for strings that contain "invariant characters", i.e.,
 * only latin letters, digits, and some punctuation.
 * See utypes.h for details.
 *
 * The string parameter must be a C string literal.
 * The length of the string, not including the terminating
 * `NUL`, must be specified as a constant.
 * @stable ICU 2.0
 */
#if !U_CHAR16_IS_TYPEDEF
# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
#else
# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
#endif

/**
 * Unicode String literals in C++.
 * Dependent on the platform properties, different UnicodeString
 * constructors should be used to create a UnicodeString object from
 * a string literal.
 * The macros are defined for improved performance.
 * They work only for strings that contain "invariant characters", i.e.,
 * only latin letters, digits, and some punctuation.
 * See utypes.h for details.
 *
 * The string parameter must be a C string literal.
 * @stable ICU 2.0
 */
#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)

/**
 * \def UNISTR_FROM_CHAR_EXPLICIT
 * This can be defined to be empty or "explicit".
 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
 * constructors are marked as explicit, preventing their inadvertent use.
 * @stable ICU 49
 */
#ifndef UNISTR_FROM_CHAR_EXPLICIT
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    // Auto-"explicit" in ICU library code.
#   define UNISTR_FROM_CHAR_EXPLICIT explicit
# else
    // Empty by default for source code compatibility.
#   define UNISTR_FROM_CHAR_EXPLICIT
# endif
#endif

/**
 * \def UNISTR_FROM_STRING_EXPLICIT
 * This can be defined to be empty or "explicit".
 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
 * constructors are marked as explicit, preventing their inadvertent use.
 *
 * In particular, this helps prevent accidentally depending on ICU conversion code
 * by passing a string literal into an API with a const UnicodeString & parameter.
 * @stable ICU 49
 */
#ifndef UNISTR_FROM_STRING_EXPLICIT
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    // Auto-"explicit" in ICU library code.
#   define UNISTR_FROM_STRING_EXPLICIT explicit
# else
    // Empty by default for source code compatibility.
#   define UNISTR_FROM_STRING_EXPLICIT
# endif
#endif

/**
 * \def UNISTR_OBJECT_SIZE
 * Desired sizeof(UnicodeString) in bytes.
 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
 * The object size may want to be a multiple of 16 bytes,
 * which is a common granularity for heap allocation.
 *
 * Any space inside the object beyond sizeof(vtable pointer) + 2
 * is available for storing short strings inside the object.
 * The bigger the object, the longer a string that can be stored inside the object,
 * without additional heap allocation.
 *
 * Depending on a platform's pointer size, pointer alignment requirements,
 * and struct padding, the compiler will usually round up sizeof(UnicodeString)
 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
 * to hold the fields for heap-allocated strings.
 * Such a minimum size also ensures that the object is easily large enough
 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
 *
 * sizeof(UnicodeString) >= 48 should work for all known platforms.
 *
 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
 * sizeof(UnicodeString) = 64 would leave space for
 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
 * char16_ts stored inside the object.
 *
 * The minimum object size on a 64-bit machine would be
 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
 * and the internal buffer would hold up to 11 char16_ts in that case.
 *
 * @see U16_MAX_LENGTH
 * @stable ICU 56
 */
#ifndef UNISTR_OBJECT_SIZE
# define UNISTR_OBJECT_SIZE 64
#endif

/**
 * UnicodeString is a string class that stores Unicode characters directly and provides
 * similar functionality as the Java String and StringBuffer/StringBuilder classes.
 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
 *
 * The UnicodeString equivalent of std::string’s clear() is remove().
 *
 * A UnicodeString may "alias" an external array of characters
 * (that is, point to it, rather than own the array)
 * whose lifetime must then at least match the lifetime of the aliasing object.
 * This aliasing may be preserved when returning a UnicodeString by value,
 * depending on the compiler and the function implementation,
 * via Return Value Optimization (RVO) or the move assignment operator.
 * (However, the copy assignment operator does not preserve aliasing.)
 * For details see the description of storage models at the end of the class API docs
 * and in the User Guide chapter linked from there.
 *
 * The UnicodeString class is not suitable for subclassing.
 *
 * For an overview of Unicode strings in C and C++ see the
 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
 *
 * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
 * A Unicode character may be stored with either one code unit
 * (the most common case) or with a matched pair of special code units
 * ("surrogates"). The data type for code units is char16_t.
 * For single-character handling, a Unicode character code *point* is a value
 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
 *
 * Indexes and offsets into and lengths of strings always count code units, not code points.
 * This is the same as with multi-byte char* strings in traditional string handling.
 * Operations on partial strings typically do not test for code point boundaries.
 * If necessary, the user needs to take care of such boundaries by testing for the code unit
 * values or by using functions like
 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
 *
 * UnicodeString methods are more lenient with regard to input parameter values
 * than other ICU APIs. In particular:
 * - If indexes are out of bounds for a UnicodeString object
 *   (< 0 or > length()) then they are "pinned" to the nearest boundary.
 * - If the buffer passed to an insert/append/replace operation is owned by the
 *   target object, e.g., calling str.append(str), an extra copy may take place
 *   to ensure safety.
 * - If primitive string pointer values (e.g., const char16_t * or char *)
 *   for input strings are NULL, then those input string parameters are treated
 *   as if they pointed to an empty string.
 *   However, this is *not* the case for char * parameters for charset names
 *   or other IDs.
 * - Most UnicodeString methods do not take a UErrorCode parameter because
 *   there are usually very few opportunities for failure other than a shortage
 *   of memory, error codes in low-level C++ string methods would be inconvenient,
 *   and the error code as the last parameter (ICU convention) would prevent
 *   the use of default parameter values.
 *   Instead, such methods set the UnicodeString into a "bogus" state
 *   (see isBogus()) if an error occurs.
 *
 * In string comparisons, two UnicodeString objects that are both "bogus"
 * compare equal (to be transitive and prevent endless loops in sorting),
 * and a "bogus" string compares less than any non-"bogus" one.
 *
 * Const UnicodeString methods are thread-safe. Multiple threads can use
 * const methods on the same UnicodeString object simultaneously,
 * but non-const methods must not be called concurrently (in multiple threads)
 * with any other (const or non-const) methods.
 *
 * Similarly, const UnicodeString & parameters are thread-safe.
 * One object may be passed in as such a parameter concurrently in multiple threads.
 * This includes the const UnicodeString & parameters for
 * copy construction, assignment, and cloning.
 *
 * UnicodeString uses several storage methods.
 * String contents can be stored inside the UnicodeString object itself,
 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
 * Most of this is done transparently, but careful aliasing in particular provides
 * significant performance improvements.
 * Also, the internal buffer is accessible via special functions.
 * For details see the
 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
 *
 * @see utf.h
 * @see CharacterIterator
 * @stable ICU 2.0
 */
class U_COMMON_API UnicodeString : public Replaceable
{
public:

  /**
   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
   * which constructs a Unicode string from an invariant-character char * string.
   * Use the macro US_INV instead of the full qualification for this value.
   *
   * @see US_INV
   * @stable ICU 3.2
   */
  enum EInvariant {
    /**
     * @see EInvariant
     * @stable ICU 3.2
     */
    kInvariant
  };

  //========================================
  // Read-only operations
  //========================================

  /* Comparison - bitwise only - for international comparison use collation */

  /**
   * Equality operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return true if `text` contains the same characters as this one,
   * false otherwise.
   * @stable ICU 2.0
   */
  inline bool operator== (const UnicodeString& text) const;

  /**
   * Inequality operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return false if `text` contains the same characters as this one,
   * true otherwise.
   * @stable ICU 2.0
   */
  inline bool operator!= (const UnicodeString& text) const;

  /**
   * Greater than operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return true if the characters in this are bitwise
   * greater than the characters in `text`, false otherwise
   * @stable ICU 2.0
   */
  inline UBool operator> (const UnicodeString& text) const;

  /**
   * Less than operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return true if the characters in this are bitwise
   * less than the characters in `text`, false otherwise
   * @stable ICU 2.0
   */
  inline UBool operator< (const UnicodeString& text) const;

  /**
   * Greater than or equal operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return true if the characters in this are bitwise
   * greater than or equal to the characters in `text`, false otherwise
   * @stable ICU 2.0
   */
  inline UBool operator>= (const UnicodeString& text) const;

  /**
   * Less than or equal operator. Performs only bitwise comparison.
   * @param text The UnicodeString to compare to this one.
   * @return true if the characters in this are bitwise
   * less than or equal to the characters in `text`, false otherwise
   * @stable ICU 2.0
   */
  inline UBool operator<= (const UnicodeString& text) const;

  /**
   * Compare the characters bitwise in this UnicodeString to
   * the characters in `text`.
   * @param text The UnicodeString to compare to this one.
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `text`, -1 if the characters in
   * this are bitwise less than the characters in `text`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `text`.
   * @stable ICU 2.0
   */
  inline int8_t compare(const UnicodeString& text) const;

  /**
   * Compare the characters bitwise in the range
   * [`start`, `start + length`) with the characters
   * in the **entire string** `text`.
   * (The parameters "start" and "length" are not applied to the other text "text".)
   * @param start the offset at which the compare operation begins
   * @param length the number of characters of text to compare.
   * @param text the other text to be compared against this string.
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `text`, -1 if the characters in
   * this are bitwise less than the characters in `text`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `text`.
   * @stable ICU 2.0
   */
  inline int8_t compare(int32_t start,
         int32_t length,
         const UnicodeString& text) const;

  /**
   * Compare the characters bitwise in the range
   * [`start`, `start + length`) with the characters
   * in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`).
   * @param start the offset at which the compare operation begins
   * @param length the number of characters in this to compare.
   * @param srcText the text to be compared
   * @param srcStart the offset into `srcText` to start comparison
   * @param srcLength the number of characters in `src` to compare
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `srcText`, -1 if the characters in
   * this are bitwise less than the characters in `srcText`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `srcText`.
   * @stable ICU 2.0
   */
   inline int8_t compare(int32_t start,
         int32_t length,
         const UnicodeString& srcText,
         int32_t srcStart,
         int32_t srcLength) const;

  /**
   * Compare the characters bitwise in this UnicodeString with the first
   * `srcLength` characters in `srcChars`.
   * @param srcChars The characters to compare to this UnicodeString.
   * @param srcLength the number of characters in `srcChars` to compare
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `srcChars`, -1 if the characters in
   * this are bitwise less than the characters in `srcChars`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `srcChars`.
   * @stable ICU 2.0
   */
  inline int8_t compare(ConstChar16Ptr srcChars,
         int32_t srcLength) const;

  /**
   * Compare the characters bitwise in the range
   * [`start`, `start + length`) with the first
   * `length` characters in `srcChars`
   * @param start the offset at which the compare operation begins
   * @param length the number of characters to compare.
   * @param srcChars the characters to be compared
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `srcChars`, -1 if the characters in
   * this are bitwise less than the characters in `srcChars`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `srcChars`.
   * @stable ICU 2.0
   */
  inline int8_t compare(int32_t start,
         int32_t length,
         const char16_t *srcChars) const;

  /**
   * Compare the characters bitwise in the range
   * [`start`, `start + length`) with the characters
   * in `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`).
   * @param start the offset at which the compare operation begins
   * @param length the number of characters in this to compare
   * @param srcChars the characters to be compared
   * @param srcStart the offset into `srcChars` to start comparison
   * @param srcLength the number of characters in `srcChars` to compare
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `srcChars`, -1 if the characters in
   * this are bitwise less than the characters in `srcChars`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `srcChars`.
   * @stable ICU 2.0
   */
  inline int8_t compare(int32_t start,
         int32_t length,
         const char16_t *srcChars,
         int32_t srcStart,
         int32_t srcLength) const;

  /**
   * Compare the characters bitwise in the range
   * [`start`, `limit`) with the characters
   * in `srcText` in the range
   * [`srcStart`, `srcLimit`).
   * @param start the offset at which the compare operation begins
   * @param limit the offset immediately following the compare operation
   * @param srcText the text to be compared
   * @param srcStart the offset into `srcText` to start comparison
   * @param srcLimit the offset into `srcText` to limit comparison
   * @return The result of bitwise character comparison: 0 if this
   * contains the same characters as `srcText`, -1 if the characters in
   * this are bitwise less than the characters in `srcText`, +1 if the
   * characters in this are bitwise greater than the characters
   * in `srcText`.
   * @stable ICU 2.0
   */
  inline int8_t compareBetween(int32_t start,
            int32_t limit,
            const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLimit) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param text Another string to compare this one to.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrder(const UnicodeString& text) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcText Another string to compare this one to.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrder(int32_t start,
                                      int32_t length,
                                      const UnicodeString& srcText) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcText Another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLength The number of code units from that string to compare.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
   inline int8_t compareCodePointOrder(int32_t start,
                                       int32_t length,
                                       const UnicodeString& srcText,
                                       int32_t srcStart,
                                       int32_t srcLength) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param srcChars A pointer to another string to compare this one to.
   * @param srcLength The number of code units from that string to compare.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
                                      int32_t srcLength) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcChars A pointer to another string to compare this one to.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrder(int32_t start,
                                      int32_t length,
                                      const char16_t *srcChars) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcChars A pointer to another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLength The number of code units from that string to compare.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrder(int32_t start,
                                      int32_t length,
                                      const char16_t *srcChars,
                                      int32_t srcStart,
                                      int32_t srcLength) const;

  /**
   * Compare two Unicode strings in code point order.
   * The result may be different from the results of compare(), operator<, etc.
   * if supplementary characters are present:
   *
   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   * which means that they compare as less than some other BMP characters like U+feff.
   * This function compares Unicode strings in code point order.
   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param limit The offset after the last code unit from this string to compare.
   * @param srcText Another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLimit The offset after the last code unit from that string to compare.
   * @return a negative/zero/positive integer corresponding to whether
   * this string is less than/equal to/greater than the second one
   * in code point order
   * @stable ICU 2.0
   */
  inline int8_t compareCodePointOrderBetween(int32_t start,
                                             int32_t limit,
                                             const UnicodeString& srcText,
                                             int32_t srcStart,
                                             int32_t srcLimit) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
   *
   * @param text Another string to compare this one to.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcText Another string to compare this one to.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(int32_t start,
         int32_t length,
         const UnicodeString& srcText,
         uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcText Another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLength The number of code units from that string to compare.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(int32_t start,
         int32_t length,
         const UnicodeString& srcText,
         int32_t srcStart,
         int32_t srcLength,
         uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   *
   * @param srcChars A pointer to another string to compare this one to.
   * @param srcLength The number of code units from that string to compare.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(ConstChar16Ptr srcChars,
         int32_t srcLength,
         uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcChars A pointer to another string to compare this one to.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(int32_t start,
         int32_t length,
         const char16_t *srcChars,
         uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param length The number of code units from this string to compare.
   * @param srcChars A pointer to another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLength The number of code units from that string to compare.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompare(int32_t start,
         int32_t length,
         const char16_t *srcChars,
         int32_t srcStart,
         int32_t srcLength,
         uint32_t options) const;

  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
   *
   * @param start The start offset in this string at which the compare operation begins.
   * @param limit The offset after the last code unit from this string to compare.
   * @param srcText Another string to compare this one to.
   * @param srcStart The start offset in that string at which the compare operation begins.
   * @param srcLimit The offset after the last code unit from that string to compare.
   * @param options A bit set of options:
   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   *     Comparison in code unit order with default case folding.
   *
   *   - U_COMPARE_CODE_POINT_ORDER
   *     Set to choose code point order instead of code unit order
   *     (see u_strCompare for details).
   *
   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   *
   * @return A negative, zero, or positive integer indicating the comparison result.
   * @stable ICU 2.0
   */
  inline int8_t caseCompareBetween(int32_t start,
            int32_t limit,
            const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLimit,
            uint32_t options) const;

  /**
   * Determine if this starts with the characters in `text`
   * @param text The text to match.
   * @return true if this starts with the characters in `text`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool startsWith(const UnicodeString& text) const;

  /**
   * Determine if this starts with the characters in `srcText`
   * in the range [`srcStart`, `srcStart + srcLength`).
   * @param srcText The text to match.
   * @param srcStart the offset into `srcText` to start matching
   * @param srcLength the number of characters in `srcText` to match
   * @return true if this starts with the characters in `text`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool startsWith(const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLength) const;

  /**
   * Determine if this starts with the characters in `srcChars`
   * @param srcChars The characters to match.
   * @param srcLength the number of characters in `srcChars`
   * @return true if this starts with the characters in `srcChars`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool startsWith(ConstChar16Ptr srcChars,
            int32_t srcLength) const;

  /**
   * Determine if this ends with the characters in `srcChars`
   * in the range  [`srcStart`, `srcStart + srcLength`).
   * @param srcChars The characters to match.
   * @param srcStart the offset into `srcText` to start matching
   * @param srcLength the number of characters in `srcChars` to match
   * @return true if this ends with the characters in `srcChars`, false otherwise
   * @stable ICU 2.0
   */
  inline UBool startsWith(const char16_t *srcChars,
            int32_t srcStart,
            int32_t srcLength) const;

  /**
   * Determine if this ends with the characters in `text`
   * @param text The text to match.
   * @return true if this ends with the characters in `text`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool endsWith(const UnicodeString& text) const;

  /**
   * Determine if this ends with the characters in `srcText`
   * in the range [`srcStart`, `srcStart + srcLength`).
   * @param srcText The text to match.
   * @param srcStart the offset into `srcText` to start matching
   * @param srcLength the number of characters in `srcText` to match
   * @return true if this ends with the characters in `text`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool endsWith(const UnicodeString& srcText,
          int32_t srcStart,
          int32_t srcLength) const;

  /**
   * Determine if this ends with the characters in `srcChars`
   * @param srcChars The characters to match.
   * @param srcLength the number of characters in `srcChars`
   * @return true if this ends with the characters in `srcChars`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool endsWith(ConstChar16Ptr srcChars,
          int32_t srcLength) const;

  /**
   * Determine if this ends with the characters in `srcChars`
   * in the range  [`srcStart`, `srcStart + srcLength`).
   * @param srcChars The characters to match.
   * @param srcStart the offset into `srcText` to start matching
   * @param srcLength the number of characters in `srcChars` to match
   * @return true if this ends with the characters in `srcChars`,
   * false otherwise
   * @stable ICU 2.0
   */
  inline UBool endsWith(const char16_t *srcChars,
          int32_t srcStart,
          int32_t srcLength) const;


  /* Searching - bitwise only */

  /**
   * Locate in this the first occurrence of the characters in `text`,
   * using bitwise comparison.
   * @param text The text to search for.
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(const UnicodeString& text) const;

  /**
   * Locate in this the first occurrence of the characters in `text`
   * starting at offset `start`, using bitwise comparison.
   * @param text The text to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(const UnicodeString& text,
              int32_t start) const;

  /**
   * Locate in this the first occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `text`, using bitwise comparison.
   * @param text The text to search for.
   * @param start The offset at which searching will start.
   * @param length The number of characters to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(const UnicodeString& text,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the first occurrence in the range
   * [`start`, `start + length`) of the characters
   *  in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`),
   * using bitwise comparison.
   * @param srcText The text to search for.
   * @param srcStart the offset into `srcText` at which
   * to start matching
   * @param srcLength the number of characters in `srcText` to match
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the first occurrence of the characters in
   * `srcChars`
   * starting at offset `start`, using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcLength the number of characters in `srcChars` to match
   * @param start the offset into this at which to start matching
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(const char16_t *srcChars,
              int32_t srcLength,
              int32_t start) const;

  /**
   * Locate in this the first occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `srcChars`, using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcLength the number of characters in `srcChars`
   * @param start The offset at which searching will start.
   * @param length The number of characters to search
   * @return The offset into this of the start of `srcChars`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(ConstChar16Ptr srcChars,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the first occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`),
   * using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcStart the offset into `srcChars` at which
   * to start matching
   * @param srcLength the number of characters in `srcChars` to match
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  int32_t indexOf(const char16_t *srcChars,
              int32_t srcStart,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the first occurrence of the BMP code point `c`,
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(char16_t c) const;

  /**
   * Locate in this the first occurrence of the code point `c`,
   * using bitwise comparison.
   *
   * @param c The code point to search for.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(UChar32 c) const;

  /**
   * Locate in this the first occurrence of the BMP code point `c`,
   * starting at offset `start`, using bitwise comparison.
   * @param c The code unit to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(char16_t c,
              int32_t start) const;

  /**
   * Locate in this the first occurrence of the code point `c`
   * starting at offset `start`, using bitwise comparison.
   *
   * @param c The code point to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(UChar32 c,
              int32_t start) const;

  /**
   * Locate in this the first occurrence of the BMP code point `c`
   * in the range [`start`, `start + length`),
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(char16_t c,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the first occurrence of the code point `c`
   * in the range [`start`, `start + length`),
   * using bitwise comparison.
   *
   * @param c The code point to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t indexOf(UChar32 c,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence of the characters in `text`,
   * using bitwise comparison.
   * @param text The text to search for.
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(const UnicodeString& text) const;

  /**
   * Locate in this the last occurrence of the characters in `text`
   * starting at offset `start`, using bitwise comparison.
   * @param text The text to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(const UnicodeString& text,
              int32_t start) const;

  /**
   * Locate in this the last occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `text`, using bitwise comparison.
   * @param text The text to search for.
   * @param start The offset at which searching will start.
   * @param length The number of characters to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(const UnicodeString& text,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`),
   * using bitwise comparison.
   * @param srcText The text to search for.
   * @param srcStart the offset into `srcText` at which
   * to start matching
   * @param srcLength the number of characters in `srcText` to match
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence of the characters in `srcChars`
   * starting at offset `start`, using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcLength the number of characters in `srcChars` to match
   * @param start the offset into this at which to start matching
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(const char16_t *srcChars,
              int32_t srcLength,
              int32_t start) const;

  /**
   * Locate in this the last occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `srcChars`, using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcLength the number of characters in `srcChars`
   * @param start The offset at which searching will start.
   * @param length The number of characters to search
   * @return The offset into this of the start of `srcChars`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence in the range
   * [`start`, `start + length`) of the characters
   * in `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`),
   * using bitwise comparison.
   * @param srcChars The text to search for.
   * @param srcStart the offset into `srcChars` at which
   * to start matching
   * @param srcLength the number of characters in `srcChars` to match
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of the start of `text`,
   * or -1 if not found.
   * @stable ICU 2.0
   */
  int32_t lastIndexOf(const char16_t *srcChars,
              int32_t srcStart,
              int32_t srcLength,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence of the BMP code point `c`,
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(char16_t c) const;

  /**
   * Locate in this the last occurrence of the code point `c`,
   * using bitwise comparison.
   *
   * @param c The code point to search for.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(UChar32 c) const;

  /**
   * Locate in this the last occurrence of the BMP code point `c`
   * starting at offset `start`, using bitwise comparison.
   * @param c The code unit to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(char16_t c,
              int32_t start) const;

  /**
   * Locate in this the last occurrence of the code point `c`
   * starting at offset `start`, using bitwise comparison.
   *
   * @param c The code point to search for.
   * @param start The offset at which searching will start.
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(UChar32 c,
              int32_t start) const;

  /**
   * Locate in this the last occurrence of the BMP code point `c`
   * in the range [`start`, `start + length`),
   * using bitwise comparison.
   * @param c The code unit to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(char16_t c,
              int32_t start,
              int32_t length) const;

  /**
   * Locate in this the last occurrence of the code point `c`
   * in the range [`start`, `start + length`),
   * using bitwise comparison.
   *
   * @param c The code point to search for.
   * @param start the offset into this at which to start matching
   * @param length the number of characters in this to search
   * @return The offset into this of `c`, or -1 if not found.
   * @stable ICU 2.0
   */
  inline int32_t lastIndexOf(UChar32 c,
              int32_t start,
              int32_t length) const;


  /* Character access */

  /**
   * Return the code unit at offset `offset`.
   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   * @param offset a valid offset into the text
   * @return the code unit at offset `offset`
   *         or 0xffff if the offset is not valid for this string
   * @stable ICU 2.0
   */
  inline char16_t charAt(int32_t offset) const;

  /**
   * Return the code unit at offset `offset`.
   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   * @param offset a valid offset into the text
   * @return the code unit at offset `offset`
   * @stable ICU 2.0
   */
  inline char16_t operator[] (int32_t offset) const;

  /**
   * Return the code point that contains the code unit
   * at offset `offset`.
   * If the offset is not valid (0..length()-1) then U+ffff is returned.
   * @param offset a valid offset into the text
   * that indicates the text offset of any of the code units
   * that will be assembled into a code point (21-bit value) and returned
   * @return the code point of text at `offset`
   *         or 0xffff if the offset is not valid for this string
   * @stable ICU 2.0
   */
  UChar32 char32At(int32_t offset) const;

  /**
   * Adjust a random-access offset so that
   * it points to the beginning of a Unicode character.
   * The offset that is passed in points to
   * any code unit of a code point,
   * while the returned offset will point to the first code unit
   * of the same code point.
   * In UTF-16, if the input offset points to a second surrogate
   * of a surrogate pair, then the returned offset will point
   * to the first surrogate.
   * @param offset a valid offset into one code point of the text
   * @return offset of the first code unit of the same code point
   * @see U16_SET_CP_START
   * @stable ICU 2.0
   */
  int32_t getChar32Start(int32_t offset) const;

  /**
   * Adjust a random-access offset so that
   * it points behind a Unicode character.
   * The offset that is passed in points behind
   * any code unit of a code point,
   * while the returned offset will point behind the last code unit
   * of the same code point.
   * In UTF-16, if the input offset points behind the first surrogate
   * (i.e., to the second surrogate)
   * of a surrogate pair, then the returned offset will point
   * behind the second surrogate (i.e., to the first surrogate).
   * @param offset a valid offset after any code unit of a code point of the text
   * @return offset of the first code unit after the same code point
   * @see U16_SET_CP_LIMIT
   * @stable ICU 2.0
   */
  int32_t getChar32Limit(int32_t offset) const;

  /**
   * Move the code unit index along the string by delta code points.
   * Interpret the input index as a code unit-based offset into the string,
   * move the index forward or backward by delta code points, and
   * return the resulting index.
   * The input index should point to the first code unit of a code point,
   * if there is more than one.
   *
   * Both input and output indexes are code unit-based as for all
   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
   * If delta<0 then the index is moved backward (toward the start of the string).
   * If delta>0 then the index is moved forward (toward the end of the string).
   *
   * This behaves like CharacterIterator::move32(delta, kCurrent).
   *
   * Behavior for out-of-bounds indexes:
   * `moveIndex32` pins the input index to 0..length(), i.e.,
   * if the input index<0 then it is pinned to 0;
   * if it is index>length() then it is pinned to length().
   * Afterwards, the index is moved by `delta` code points
   * forward or backward,
   * but no further backward than to 0 and no further forward than to length().
   * The resulting index return value will be in between 0 and length(), inclusively.
   *
   * Examples:
   * \code
   *     // s has code points 'a' U+10000 'b' U+10ffff U+2029
   *     UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
   *
   *     // initial index: position of U+10000
   *     int32_t index=1;
   *
   *     // the following examples will all result in index==4, position of U+10ffff
   *
   *     // skip 2 code points from some position in the string
   *     index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
   *
   *     // go to the 3rd code point from the start of s (0-based)
   *     index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
   *
   *     // go to the next-to-last code point of s
   *     index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
   * \endcode
   *
   * @param index input code unit index
   * @param delta (signed) code point count to move the index forward or backward
   *        in the string
   * @return the resulting code unit index
   * @stable ICU 2.0
   */
  int32_t moveIndex32(int32_t index, int32_t delta) const;

  /* Substring extraction */

  /**
   * Copy the characters in the range
   * [`start`, `start + length`) into the array `dst`,
   * beginning at `dstStart`.
   * If the string aliases to `dst` itself as an external buffer,
   * then extract() will not copy the contents.
   *
   * @param start offset of first character which will be copied into the array
   * @param length the number of characters to extract
   * @param dst array in which to copy characters.  The length of `dst`
   * must be at least (`dstStart + length`).
   * @param dstStart the offset in `dst` where the first character
   * will be extracted
   * @stable ICU 2.0
   */
  inline void extract(int32_t start,
           int32_t length,
           Char16Ptr dst,
           int32_t dstStart = 0) const;

  /**
   * Copy the contents of the string into dest.
   * This is a convenience function that
   * checks if there is enough space in dest,
   * extracts the entire string if possible,
   * and NUL-terminates dest if possible.
   *
   * If the string fits into dest but cannot be NUL-terminated
   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
   * If the string itself does not fit into dest
   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
   *
   * If the string aliases to `dest` itself as an external buffer,
   * then extract() will not copy the contents.
   *
   * @param dest Destination string buffer.
   * @param destCapacity Number of char16_ts available at dest.
   * @param errorCode ICU error code.
   * @return length()
   * @stable ICU 2.0
   */
  int32_t
  extract(Char16Ptr dest, int32_t destCapacity,
          UErrorCode &errorCode) const;

  /**
   * Copy the characters in the range
   * [`start`, `start + length`) into the  UnicodeString
   * `target`.
   * @param start offset of first character which will be copied
   * @param length the number of characters to extract
   * @param target UnicodeString into which to copy characters.
   * @stable ICU 2.0
   */
  inline void extract(int32_t start,
           int32_t length,
           UnicodeString& target) const;

  /**
   * Copy the characters in the range [`start`, `limit`)
   * into the array `dst`, beginning at `dstStart`.
   * @param start offset of first character which will be copied into the array
   * @param limit offset immediately following the last character to be copied
   * @param dst array in which to copy characters.  The length of `dst`
   * must be at least (`dstStart + (limit - start)`).
   * @param dstStart the offset in `dst` where the first character
   * will be extracted
   * @stable ICU 2.0
   */
  inline void extractBetween(int32_t start,
              int32_t limit,
              char16_t *dst,
              int32_t dstStart = 0) const;

  /**
   * Copy the characters in the range [`start`, `limit`)
   * into the UnicodeString `target`.  Replaceable API.
   * @param start offset of first character which will be copied
   * @param limit offset immediately following the last character to be copied
   * @param target UnicodeString into which to copy characters.
   * @stable ICU 2.0
   */
  virtual void extractBetween(int32_t start,
              int32_t limit,
              UnicodeString& target) const override;

  /**
   * Copy the characters in the range
   * [`start`, `start + startLength`) into an array of characters.
   * All characters must be invariant (see utypes.h).
   * Use US_INV as the last, signature-distinguishing parameter.
   *
   * This function does not write any more than `targetCapacity`
   * characters but returns the length of the entire output string
   * so that one can allocate a larger buffer and call the function again
   * if necessary.
   * The output string is NUL-terminated if possible.
   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction, can be NULL
   *               if targetLength is 0
   * @param targetCapacity the length of the target buffer
   * @param inv Signature-distinguishing parameter, use US_INV.
   * @return the output string length, not including the terminating NUL
   * @stable ICU 3.2
   */
  int32_t extract(int32_t start,
           int32_t startLength,
           char *target,
           int32_t targetCapacity,
           enum EInvariant inv) const;

#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION

  /**
   * Copy the characters in the range
   * [`start`, `start + length`) into an array of characters
   * in the platform's default codepage.
   * This function does not write any more than `targetLength`
   * characters but returns the length of the entire output string
   * so that one can allocate a larger buffer and call the function again
   * if necessary.
   * The output string is NUL-terminated if possible.
   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction
   * @param targetLength the length of the target buffer
   * If `target` is NULL, then the number of bytes required for
   * `target` is returned.
   * @return the output string length, not including the terminating NUL
   * @stable ICU 2.0
   */
  int32_t extract(int32_t start,
           int32_t startLength,
           char *target,
           uint32_t targetLength) const;

#endif

#if !UCONFIG_NO_CONVERSION

  /**
   * Copy the characters in the range
   * [`start`, `start + length`) into an array of characters
   * in a specified codepage.
   * The output string is NUL-terminated.
   *
   * Recommendation: For invariant-character strings use
   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   * because it avoids object code dependencies of UnicodeString on
   * the conversion code.
   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction
   * @param codepage the desired codepage for the characters.  0 has
   * the special meaning of the default codepage
   * If `codepage` is an empty string (`""`),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
   * If `target` is NULL, then the number of bytes required for
   * `target` is returned. It is assumed that the target is big enough
   * to fit all of the characters.
   * @return the output string length, not including the terminating NUL
   * @stable ICU 2.0
   */
  inline int32_t extract(int32_t start,
                 int32_t startLength,
                 char *target,
                 const char *codepage = 0) const;

  /**
   * Copy the characters in the range
   * [`start`, `start + length`) into an array of characters
   * in a specified codepage.
   * This function does not write any more than `targetLength`
   * characters but returns the length of the entire output string
   * so that one can allocate a larger buffer and call the function again
   * if necessary.
   * The output string is NUL-terminated if possible.
   *
   * Recommendation: For invariant-character strings use
   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   * because it avoids object code dependencies of UnicodeString on
   * the conversion code.
   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction
   * @param targetLength the length of the target buffer
   * @param codepage the desired codepage for the characters.  0 has
   * the special meaning of the default codepage
   * If `codepage` is an empty string (`""`),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
   * If `target` is NULL, then the number of bytes required for
   * `target` is returned.
   * @return the output string length, not including the terminating NUL
   * @stable ICU 2.0
   */
  int32_t extract(int32_t start,
           int32_t startLength,
           char *target,
           uint32_t targetLength,
           const char *codepage) const;

  /**
   * Convert the UnicodeString into a codepage string using an existing UConverter.
   * The output string is NUL-terminated if possible.
   *
   * This function avoids the overhead of opening and closing a converter if
   * multiple strings are extracted.
   *
   * @param dest destination string buffer, can be NULL if destCapacity==0
   * @param destCapacity the number of chars available at dest
   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
   *        or NULL for the default converter
   * @param errorCode normal ICU error code
   * @return the length of the output string, not counting the terminating NUL;
   *         if the length is greater than destCapacity, then the string will not fit
   *         and a buffer of the indicated length would need to be passed in
   * @stable ICU 2.0
   */
  int32_t extract(char *dest, int32_t destCapacity,
                  UConverter *cnv,
                  UErrorCode &errorCode) const;

#endif

  /**
   * Create a temporary substring for the specified range.
   * Unlike the substring constructor and setTo() functions,
   * the object returned here will be a read-only alias (using getBuffer())
   * rather than copying the text.
   * As a result, this substring operation is much faster but requires
   * that the original string not be modified or deleted during the lifetime
   * of the returned substring object.
   * @param start offset of the first character visible in the substring
   * @param length length of the substring
   * @return a read-only alias UnicodeString object for the substring
   * @stable ICU 4.4
   */
  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;

  /**
   * Create a temporary substring for the specified range.
   * Same as tempSubString(start, length) except that the substring range
   * is specified as a (start, limit) pair (with an exclusive limit index)
   * rather than a (start, length) pair.
   * @param start offset of the first character visible in the substring
   * @param limit offset immediately following the last character visible in the substring
   * @return a read-only alias UnicodeString object for the substring
   * @stable ICU 4.4
   */
  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;

  /**
   * Convert the UnicodeString to UTF-8 and write the result
   * to a ByteSink. This is called by toUTF8String().
   * Unpaired surrogates are replaced with U+FFFD.
   * Calls u_strToUTF8WithSub().
   *
   * @param sink A ByteSink to which the UTF-8 version of the string is written.
   *             sink.Flush() is called at the end.
   * @stable ICU 4.2
   * @see toUTF8String
   */
  void toUTF8(ByteSink &sink) const;

  /**
   * Convert the UnicodeString to UTF-8 and append the result
   * to a standard string.
   * Unpaired surrogates are replaced with U+FFFD.
   * Calls toUTF8().
   *
   * @param result A standard string (or a compatible object)
   *        to which the UTF-8 version of the string is appended.
   * @return The string object.
   * @stable ICU 4.2
   * @see toUTF8
   */
  template<typename StringClass>
  StringClass &toUTF8String(StringClass &result) const {
    StringByteSink<StringClass> sbs(&result, length());
    toUTF8(sbs);
    return result;
  }

  /**
   * Convert the UnicodeString to UTF-32.
   * Unpaired surrogates are replaced with U+FFFD.
   * Calls u_strToUTF32WithSub().
   *
   * @param utf32 destination string buffer, can be NULL if capacity==0
   * @param capacity the number of UChar32s available at utf32
   * @param errorCode Standard ICU error code. Its input value must
   *                  pass the U_SUCCESS() test, or else the function returns
   *                  immediately. Check for U_FAILURE() on output or use with
   *                  function chaining. (See User Guide for details.)
   * @return The length of the UTF-32 string.
   * @see fromUTF32
   * @stable ICU 4.2
   */
  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;

  /* Length operations */

  /**
   * Return the length of the UnicodeString object.
   * The length is the number of char16_t code units are in the UnicodeString.
   * If you want the number of code points, please use countChar32().
   * @return the length of the UnicodeString object
   * @see countChar32
   * @stable ICU 2.0
   */
  inline int32_t length(void) const;

  /**
   * Count Unicode code points in the length char16_t code units of the string.
   * A code point may occupy either one or two char16_t code units.
   * Counting code points involves reading all code units.
   *
   * This functions is basically the inverse of moveIndex32().
   *
   * @param start the index of the first code unit to check
   * @param length the number of char16_t code units to check
   * @return the number of code points in the specified code units
   * @see length
   * @stable ICU 2.0
   */
  int32_t
  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;

  /**
   * Check if the length char16_t code units of the string
   * contain more Unicode code points than a certain number.
   * This is more efficient than counting all code points in this part of the string
   * and comparing that number with a threshold.
   * This function may not need to scan the string at all if the length
   * falls within a certain range, and
   * never needs to count more than 'number+1' code points.
   * Logically equivalent to (countChar32(start, length)>number).
   * A Unicode code point may occupy either one or two char16_t code units.
   *
   * @param start the index of the first code unit to check (0 for the entire string)
   * @param length the number of char16_t code units to check
   *               (use INT32_MAX for the entire string; remember that start/length
   *                values are pinned)
   * @param number The number of code points in the (sub)string is compared against
   *               the 'number' parameter.
   * @return Boolean value for whether the string contains more Unicode code points
   *         than 'number'. Same as (u_countChar32(s, length)>number).
   * @see countChar32
   * @see u_strHasMoreChar32Than
   * @stable ICU 2.4
   */
  UBool
  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;

  /**
   * Determine if this string is empty.
   * @return true if this string contains 0 characters, false otherwise.
   * @stable ICU 2.0
   */
  inline UBool isEmpty(void) const;

  /**
   * Return the capacity of the internal buffer of the UnicodeString object.
   * This is useful together with the getBuffer functions.
   * See there for details.
   *
   * @return the number of char16_ts available in the internal buffer
   * @see getBuffer
   * @stable ICU 2.0
   */
  inline int32_t getCapacity(void) const;

  /* Other operations */

  /**
   * Generate a hash code for this object.
   * @return The hash code of this UnicodeString.
   * @stable ICU 2.0
   */
  inline int32_t hashCode(void) const;

  /**
   * Determine if this object contains a valid string.
   * A bogus string has no value. It is different from an empty string,
   * although in both cases isEmpty() returns true and length() returns 0.
   * setToBogus() and isBogus() can be used to indicate that no string value is available.
   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
   * length() returns 0.
   *
   * @return true if the string is bogus/invalid, false otherwise
   * @see setToBogus()
   * @stable ICU 2.0
   */
  inline UBool isBogus(void) const;


  //========================================
  // Write operations
  //========================================

  /* Assignment operations */

  /**
   * Assignment operator.  Replace the characters in this UnicodeString
   * with the characters from `srcText`.
   *
   * Starting with ICU 2.4, the assignment operator and the copy constructor
   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   * By contrast, the fastCopyFrom() function implements the old,
   * more efficient but less safe behavior
   * of making this string also a readonly alias to the same buffer.
   *
   * If the source object has an "open" buffer from getBuffer(minCapacity),
   * then the copy is an empty string.
   *
   * @param srcText The text containing the characters to replace
   * @return a reference to this
   * @stable ICU 2.0
   * @see fastCopyFrom
   */
  UnicodeString &operator=(const UnicodeString &srcText);

  /**
   * Almost the same as the assignment operator.
   * Replace the characters in this UnicodeString
   * with the characters from `srcText`.
   *
   * This function works the same as the assignment operator
   * for all strings except for ones that are readonly aliases.
   *
   * Starting with ICU 2.4, the assignment operator and the copy constructor
   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   * This function implements the old, more efficient but less safe behavior
   * of making this string also a readonly alias to the same buffer.
   *
   * The fastCopyFrom function must be used only if it is known that the lifetime of
   * this UnicodeString does not exceed the lifetime of the aliased buffer
   * including its contents, for example for strings from resource bundles
   * or aliases to string constants.
   *
   * If the source object has an "open" buffer from getBuffer(minCapacity),
   * then the copy is an empty string.
   *
   * @param src The text containing the characters to replace.
   * @return a reference to this
   * @stable ICU 2.4
   */
  UnicodeString &fastCopyFrom(const UnicodeString &src);

  /**
   * Move assignment operator; might leave src in bogus state.
   * This string will have the same contents and state that the source string had.
   * The behavior is undefined if *this and src are the same object.
   * @param src source string
   * @return *this
   * @stable ICU 56
   */
  UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT;

  /**
   * Swap strings.
   * @param other other string
   * @stable ICU 56
   */
  void swap(UnicodeString &other) U_NOEXCEPT;

  /**
   * Non-member UnicodeString swap function.
   * @param s1 will get s2's contents and state
   * @param s2 will get s1's contents and state
   * @stable ICU 56
   */
  friend inline void U_EXPORT2
  swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
    s1.swap(s2);
  }

  /**
   * Assignment operator.  Replace the characters in this UnicodeString
   * with the code unit `ch`.
   * @param ch the code unit to replace
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& operator= (char16_t ch);

  /**
   * Assignment operator.  Replace the characters in this UnicodeString
   * with the code point `ch`.
   * @param ch the code point to replace
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& operator= (UChar32 ch);

  /**
   * Set the text in the UnicodeString object to the characters
   * in `srcText` in the range
   * [`srcStart`, `srcText.length()`).
   * `srcText` is not modified.
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcText` where new characters
   * will be obtained
   * @return a reference to this
   * @stable ICU 2.2
   */
  inline UnicodeString& setTo(const UnicodeString& srcText,
               int32_t srcStart);

  /**
   * Set the text in the UnicodeString object to the characters
   * in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`).
   * `srcText` is not modified.
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcText` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcText` in the
   * replace string.
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& setTo(const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength);

  /**
   * Set the text in the UnicodeString object to the characters in
   * `srcText`.
   * `srcText` is not modified.
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& setTo(const UnicodeString& srcText);

  /**
   * Set the characters in the UnicodeString object to the characters
   * in `srcChars`. `srcChars` is not modified.
   * @param srcChars the source for the new characters
   * @param srcLength the number of Unicode characters in srcChars.
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& setTo(const char16_t *srcChars,
               int32_t srcLength);

  /**
   * Set the characters in the UnicodeString object to the code unit
   * `srcChar`.
   * @param srcChar the code unit which becomes the UnicodeString's character
   * content
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& setTo(char16_t srcChar);

  /**
   * Set the characters in the UnicodeString object to the code point
   * `srcChar`.
   * @param srcChar the code point which becomes the UnicodeString's character
   * content
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& setTo(UChar32 srcChar);

  /**
   * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
   * The text will be used for the UnicodeString object, but
   * it will not be released when the UnicodeString is destroyed.
   * This has copy-on-write semantics:
   * When the string is modified, then the buffer is first copied into
   * newly allocated memory.
   * The aliased buffer is never modified.
   *
   * In an assignment to another UnicodeString, when using the copy constructor
   * or the assignment operator, the text will be copied.
   * When using fastCopyFrom(), the text will be aliased again,
   * so that both strings then alias the same readonly-text.
   *
   * @param isTerminated specifies if `text` is `NUL`-terminated.
   *                     This must be true if `textLength==-1`.
   * @param text The characters to alias for the UnicodeString.
   * @param textLength The number of Unicode characters in `text` to alias.
   *                   If -1, then this constructor will determine the length
   *                   by calling `u_strlen()`.
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString &setTo(UBool isTerminated,
                       ConstChar16Ptr text,
                       int32_t textLength);

  /**
   * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
   * The text will be used for the UnicodeString object, but
   * it will not be released when the UnicodeString is destroyed.
   * This has write-through semantics:
   * For as long as the capacity of the buffer is sufficient, write operations
   * will directly affect the buffer. When more capacity is necessary, then
   * a new buffer will be allocated and the contents copied as with regularly
   * constructed strings.
   * In an assignment to another UnicodeString, the buffer will be copied.
   * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
   * as the string buffer itself and will in this case not copy the contents.
   *
   * @param buffer The characters to alias for the UnicodeString.
   * @param buffLength The number of Unicode characters in `buffer` to alias.
   * @param buffCapacity The size of `buffer` in char16_ts.
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString &setTo(char16_t *buffer,
                       int32_t buffLength,
                       int32_t buffCapacity);

  /**
   * Make this UnicodeString object invalid.
   * The string will test true with isBogus().
   *
   * A bogus string has no value. It is different from an empty string.
   * It can be used to indicate that no string value is available.
   * getBuffer() and getTerminatedBuffer() return NULL, and
   * length() returns 0.
   *
   * This utility function is used throughout the UnicodeString
   * implementation to indicate that a UnicodeString operation failed,
   * and may be used in other functions,
   * especially but not exclusively when such functions do not
   * take a UErrorCode for simplicity.
   *
   * The following methods, and no others, will clear a string object's bogus flag:
   * - remove()
   * - remove(0, INT32_MAX)
   * - truncate(0)
   * - operator=() (assignment operator)
   * - setTo(...)
   *
   * The simplest ways to turn a bogus string into an empty one
   * is to use the remove() function.
   * Examples for other functions that are equivalent to "set to empty string":
   * \code
   * if(s.isBogus()) {
   *   s.remove();           // set to an empty string (remove all), or
   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
   *   s.truncate(0);        // set to an empty string (complete truncation), or
   *   s=UnicodeString();    // assign an empty string, or
   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
   *   s.setTo(u"", 0);      // set to an empty C Unicode string
   * }
   * \endcode
   *
   * @see isBogus()
   * @stable ICU 2.0
   */
  void setToBogus();

  /**
   * Set the character at the specified offset to the specified character.
   * @param offset A valid offset into the text of the character to set
   * @param ch The new character
   * @return A reference to this
   * @stable ICU 2.0
   */
  UnicodeString& setCharAt(int32_t offset,
               char16_t ch);


  /* Append operations */

  /**
   * Append operator. Append the code unit `ch` to the UnicodeString
   * object.
   * @param ch the code unit to be appended
   * @return a reference to this
   * @stable ICU 2.0
   */
 inline  UnicodeString& operator+= (char16_t ch);

  /**
   * Append operator. Append the code point `ch` to the UnicodeString
   * object.
   * @param ch the code point to be appended
   * @return a reference to this
   * @stable ICU 2.0
   */
 inline  UnicodeString& operator+= (UChar32 ch);

  /**
   * Append operator. Append the characters in `srcText` to the
   * UnicodeString object. `srcText` is not modified.
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& operator+= (const UnicodeString& srcText);

  /**
   * Append the characters
   * in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`) to the
   * UnicodeString object at offset `start`. `srcText`
   * is not modified.
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcText` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcText` in
   * the append string
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& append(const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLength);

  /**
   * Append the characters in `srcText` to the UnicodeString object.
   * `srcText` is not modified.
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& append(const UnicodeString& srcText);

  /**
   * Append the characters in `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
   * object at offset
   * `start`. `srcChars` is not modified.
   * @param srcChars the source for the new characters
   * @param srcStart the offset into `srcChars` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcChars` in
   *                  the append string; can be -1 if `srcChars` is NUL-terminated
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& append(const char16_t *srcChars,
            int32_t srcStart,
            int32_t srcLength);

  /**
   * Append the characters in `srcChars` to the UnicodeString object
   * at offset `start`. `srcChars` is not modified.
   * @param srcChars the source for the new characters
   * @param srcLength the number of Unicode characters in `srcChars`;
   *                  can be -1 if `srcChars` is NUL-terminated
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& append(ConstChar16Ptr srcChars,
            int32_t srcLength);

  /**
   * Append the code unit `srcChar` to the UnicodeString object.
   * @param srcChar the code unit to append
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& append(char16_t srcChar);

  /**
   * Append the code point `srcChar` to the UnicodeString object.
   * @param srcChar the code point to append
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString& append(UChar32 srcChar);


  /* Insert operations */

  /**
   * Insert the characters in `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
   * object at offset `start`. `srcText` is not modified.
   * @param start the offset where the insertion begins
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcText` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcText` in
   * the insert string
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLength);

  /**
   * Insert the characters in `srcText` into the UnicodeString object
   * at offset `start`. `srcText` is not modified.
   * @param start the offset where the insertion begins
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            const UnicodeString& srcText);

  /**
   * Insert the characters in `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
   *  object at offset `start`. `srcChars` is not modified.
   * @param start the offset at which the insertion begins
   * @param srcChars the source for the new characters
   * @param srcStart the offset into `srcChars` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcChars`
   * in the insert string
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            const char16_t *srcChars,
            int32_t srcStart,
            int32_t srcLength);

  /**
   * Insert the characters in `srcChars` into the UnicodeString object
   * at offset `start`. `srcChars` is not modified.
   * @param start the offset where the insertion begins
   * @param srcChars the source for the new characters
   * @param srcLength the number of Unicode characters in srcChars.
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            ConstChar16Ptr srcChars,
            int32_t srcLength);

  /**
   * Insert the code unit `srcChar` into the UnicodeString object at
   * offset `start`.
   * @param start the offset at which the insertion occurs
   * @param srcChar the code unit to insert
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            char16_t srcChar);

  /**
   * Insert the code point `srcChar` into the UnicodeString object at
   * offset `start`.
   * @param start the offset at which the insertion occurs
   * @param srcChar the code point to insert
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& insert(int32_t start,
            UChar32 srcChar);


  /* Replace operations */

  /**
   * Replace the characters in the range
   * [`start`, `start + length`) with the characters in
   * `srcText` in the range
   * [`srcStart`, `srcStart + srcLength`).
   * `srcText` is not modified.
   * @param start the offset at which the replace operation begins
   * @param length the number of characters to replace. The character at
   * `start + length` is not modified.
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcText` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcText` in
   * the replace string
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replace(int32_t start,
             int32_t length,
             const UnicodeString& srcText,
             int32_t srcStart,
             int32_t srcLength);

  /**
   * Replace the characters in the range
   * [`start`, `start + length`)
   * with the characters in `srcText`.  `srcText` is
   *  not modified.
   * @param start the offset at which the replace operation begins
   * @param length the number of characters to replace. The character at
   * `start + length` is not modified.
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replace(int32_t start,
             int32_t length,
             const UnicodeString& srcText);

  /**
   * Replace the characters in the range
   * [`start`, `start + length`) with the characters in
   * `srcChars` in the range
   * [`srcStart`, `srcStart + srcLength`). `srcChars`
   * is not modified.
   * @param start the offset at which the replace operation begins
   * @param length the number of characters to replace.  The character at
   * `start + length` is not modified.
   * @param srcChars the source for the new characters
   * @param srcStart the offset into `srcChars` where new characters
   * will be obtained
   * @param srcLength the number of characters in `srcChars`
   * in the replace string
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replace(int32_t start,
             int32_t length,
             const char16_t *srcChars,
             int32_t srcStart,
             int32_t srcLength);

  /**
   * Replace the characters in the range
   * [`start`, `start + length`) with the characters in
   * `srcChars`.  `srcChars` is not modified.
   * @param start the offset at which the replace operation begins
   * @param length number of characters to replace.  The character at
   * `start + length` is not modified.
   * @param srcChars the source for the new characters
   * @param srcLength the number of Unicode characters in srcChars
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replace(int32_t start,
             int32_t length,
             ConstChar16Ptr srcChars,
             int32_t srcLength);

  /**
   * Replace the characters in the range
   * [`start`, `start + length`) with the code unit
   * `srcChar`.
   * @param start the offset at which the replace operation begins
   * @param length the number of characters to replace.  The character at
   * `start + length` is not modified.
   * @param srcChar the new code unit
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replace(int32_t start,
             int32_t length,
             char16_t srcChar);

  /**
   * Replace the characters in the range
   * [`start`, `start + length`) with the code point
   * `srcChar`.
   * @param start the offset at which the replace operation begins
   * @param length the number of characters to replace.  The character at
   * `start + length` is not modified.
   * @param srcChar the new code point
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);

  /**
   * Replace the characters in the range [`start`, `limit`)
   * with the characters in `srcText`. `srcText` is not modified.
   * @param start the offset at which the replace operation begins
   * @param limit the offset immediately following the replace range
   * @param srcText the source for the new characters
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replaceBetween(int32_t start,
                int32_t limit,
                const UnicodeString& srcText);

  /**
   * Replace the characters in the range [`start`, `limit`)
   * with the characters in `srcText` in the range
   * [`srcStart`, `srcLimit`). `srcText` is not modified.
   * @param start the offset at which the replace operation begins
   * @param limit the offset immediately following the replace range
   * @param srcText the source for the new characters
   * @param srcStart the offset into `srcChars` where new characters
   * will be obtained
   * @param srcLimit the offset immediately following the range to copy
   * in `srcText`
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& replaceBetween(int32_t start,
                int32_t limit,
                const UnicodeString& srcText,
                int32_t srcStart,
                int32_t srcLimit);

  /**
   * Replace a substring of this object with the given text.
   * @param start the beginning index, inclusive; `0 <= start <= limit`.
   * @param limit the ending index, exclusive; `start <= limit <= length()`.
   * @param text the text to replace characters `start` to `limit - 1`
   * @stable ICU 2.0
   */
  virtual void handleReplaceBetween(int32_t start,
                                    int32_t limit,
                                    const UnicodeString& text) override;

  /**
   * Replaceable API
   * @return true if it has MetaData
   * @stable ICU 2.4
   */
  virtual UBool hasMetaData() const override;

  /**
   * Copy a substring of this object, retaining attribute (out-of-band)
   * information.  This method is used to duplicate or reorder substrings.
   * The destination index must not overlap the source range.
   *
   * @param start the beginning index, inclusive; `0 <= start <= limit`.
   * @param limit the ending index, exclusive; `start <= limit <= length()`.
   * @param dest the destination index.  The characters from
   *             `start..limit-1` will be copied to `dest`.
   * Implementations of this method may assume that `dest <= start ||
   * dest >= limit`.
   * @stable ICU 2.0
   */
  virtual void copy(int32_t start, int32_t limit, int32_t dest) override;

  /* Search and replace operations */

  /**
   * Replace all occurrences of characters in oldText with the characters
   * in newText
   * @param oldText the text containing the search text
   * @param newText the text containing the replacement text
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
                const UnicodeString& newText);

  /**
   * Replace all occurrences of characters in oldText with characters
   * in newText
   * in the range [`start`, `start + length`).
   * @param start the start of the range in which replace will performed
   * @param length the length of the range in which replace will be performed
   * @param oldText the text containing the search text
   * @param newText the text containing the replacement text
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& findAndReplace(int32_t start,
                int32_t length,
                const UnicodeString& oldText,
                const UnicodeString& newText);

  /**
   * Replace all occurrences of characters in oldText in the range
   * [`oldStart`, `oldStart + oldLength`) with the characters
   * in newText in the range
   * [`newStart`, `newStart + newLength`)
   * in the range [`start`, `start + length`).
   * @param start the start of the range in which replace will performed
   * @param length the length of the range in which replace will be performed
   * @param oldText the text containing the search text
   * @param oldStart the start of the search range in `oldText`
   * @param oldLength the length of the search range in `oldText`
   * @param newText the text containing the replacement text
   * @param newStart the start of the replacement range in `newText`
   * @param newLength the length of the replacement range in `newText`
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString& findAndReplace(int32_t start,
                int32_t length,
                const UnicodeString& oldText,
                int32_t oldStart,
                int32_t oldLength,
                const UnicodeString& newText,
                int32_t newStart,
                int32_t newLength);


  /* Remove operations */

  /**
   * Removes all characters from the UnicodeString object and clears the bogus flag.
   * This is the UnicodeString equivalent of std::string’s clear().
   *
   * @return a reference to this
   * @see setToBogus
   * @stable ICU 2.0
   */
  inline UnicodeString& remove();

  /**
   * Remove the characters in the range
   * [`start`, `start + length`) from the UnicodeString object.
   * @param start the offset of the first character to remove
   * @param length the number of characters to remove
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& remove(int32_t start,
                               int32_t length = (int32_t)INT32_MAX);

  /**
   * Remove the characters in the range
   * [`start`, `limit`) from the UnicodeString object.
   * @param start the offset of the first character to remove
   * @param limit the offset immediately following the range to remove
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& removeBetween(int32_t start,
                                      int32_t limit = (int32_t)INT32_MAX);

  /**
   * Retain only the characters in the range
   * [`start`, `limit`) from the UnicodeString object.
   * Removes characters before `start` and at and after `limit`.
   * @param start the offset of the first character to retain
   * @param limit the offset immediately following the range to retain
   * @return a reference to this
   * @stable ICU 4.4
   */
  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);

  /* Length operations */

  /**
   * Pad the start of this UnicodeString with the character `padChar`.
   * If the length of this UnicodeString is less than targetLength,
   * length() - targetLength copies of padChar will be added to the
   * beginning of this UnicodeString.
   * @param targetLength the desired length of the string
   * @param padChar the character to use for padding. Defaults to
   * space (U+0020)
   * @return true if the text was padded, false otherwise.
   * @stable ICU 2.0
   */
  UBool padLeading(int32_t targetLength,
                    char16_t padChar = 0x0020);

  /**
   * Pad the end of this UnicodeString with the character `padChar`.
   * If the length of this UnicodeString is less than targetLength,
   * length() - targetLength copies of padChar will be added to the
   * end of this UnicodeString.
   * @param targetLength the desired length of the string
   * @param padChar the character to use for padding. Defaults to
   * space (U+0020)
   * @return true if the text was padded, false otherwise.
   * @stable ICU 2.0
   */
  UBool padTrailing(int32_t targetLength,
                     char16_t padChar = 0x0020);

  /**
   * Truncate this UnicodeString to the `targetLength`.
   * @param targetLength the desired length of this UnicodeString.
   * @return true if the text was truncated, false otherwise
   * @stable ICU 2.0
   */
  inline UBool truncate(int32_t targetLength);

  /**
   * Trims leading and trailing whitespace from this UnicodeString.
   * @return a reference to this
   * @stable ICU 2.0
   */
  UnicodeString& trim(void);


  /* Miscellaneous operations */

  /**
   * Reverse this UnicodeString in place.
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& reverse(void);

  /**
   * Reverse the range [`start`, `start + length`) in
   * this UnicodeString.
   * @param start the start of the range to reverse
   * @param length the number of characters to to reverse
   * @return a reference to this
   * @stable ICU 2.0
   */
  inline UnicodeString& reverse(int32_t start,
             int32_t length);

  /**
   * Convert the characters in this to UPPER CASE following the conventions of
   * the default locale.
   * @return A reference to this.
   * @stable ICU 2.0
   */
  UnicodeString& toUpper(void);

  /**
   * Convert the characters in this to UPPER CASE following the conventions of
   * a specific locale.
   * @param locale The locale containing the conventions to use.
   * @return A reference to this.
   * @stable ICU 2.0
   */
  UnicodeString& toUpper(const Locale& locale);

  /**
   * Convert the characters in this to lower case following the conventions of
   * the default locale.
   * @return A reference to this.
   * @stable ICU 2.0
   */
  UnicodeString& toLower(void);

  /**
   * Convert the characters in this to lower case following the conventions of
   * a specific locale.
   * @param locale The locale containing the conventions to use.
   * @return A reference to this.
   * @stable ICU 2.0
   */
  UnicodeString& toLower(const Locale& locale);

#if !UCONFIG_NO_BREAK_ITERATION

  /**
   * Titlecase this string, convenience function using the default locale.
   *
   * Casing is locale-dependent and context-sensitive.
   * Titlecasing uses a break iterator to find the first characters of words
   * that are to be titlecased. It titlecases those characters and lowercases
   * all others.
   *
   * The titlecase break iterator can be provided to customize for arbitrary
   * styles, using rules and dictionaries beyond the standard iterators.
   * It may be more efficient to always provide an iterator to avoid
   * opening and closing one for each string.
   * The standard titlecase iterator for the root locale implements the
   * algorithm of Unicode TR 21.
   *
   * This function uses only the setText(), first() and next() methods of the
   * provided break iterator.
   *
   * @param titleIter A break iterator to find the first characters of words
   *                  that are to be titlecased.
   *                  If none is provided (0), then a standard titlecase
   *                  break iterator is opened.
   *                  Otherwise the provided iterator is set to the string's text.
   * @return A reference to this.
   * @stable ICU 2.1
   */
  UnicodeString &toTitle(BreakIterator *titleIter);

  /**
   * Titlecase this string.
   *
   * Casing is locale-dependent and context-sensitive.
   * Titlecasing uses a break iterator to find the first characters of words
   * that are to be titlecased. It titlecases those characters and lowercases
   * all others.
   *
   * The titlecase break iterator can be provided to customize for arbitrary
   * styles, using rules and dictionaries beyond the standard iterators.
   * It may be more efficient to always provide an iterator to avoid
   * opening and closing one for each string.
   * The standard titlecase iterator for the root locale implements the
   * algorithm of Unicode TR 21.
   *
   * This function uses only the setText(), first() and next() methods of the
   * provided break iterator.
   *
   * @param titleIter A break iterator to find the first characters of words
   *                  that are to be titlecased.
   *                  If none is provided (0), then a standard titlecase
   *                  break iterator is opened.
   *                  Otherwise the provided iterator is set to the string's text.
   * @param locale    The locale to consider.
   * @return A reference to this.
   * @stable ICU 2.1
   */
  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);

  /**
   * Titlecase this string, with options.
   *
   * Casing is locale-dependent and context-sensitive.
   * Titlecasing uses a break iterator to find the first characters of words
   * that are to be titlecased. It titlecases those characters and lowercases
   * all others. (This can be modified with options.)
   *
   * The titlecase break iterator can be provided to customize for arbitrary
   * styles, using rules and dictionaries beyond the standard iterators.
   * It may be more efficient to always provide an iterator to avoid
   * opening and closing one for each string.
   * The standard titlecase iterator for the root locale implements the
   * algorithm of Unicode TR 21.
   *
   * This function uses only the setText(), first() and next() methods of the
   * provided break iterator.
   *
   * @param titleIter A break iterator to find the first characters of words
   *                  that are to be titlecased.
   *                  If none is provided (0), then a standard titlecase
   *                  break iterator is opened.
   *                  Otherwise the provided iterator is set to the string's text.
   * @param locale    The locale to consider.
   * @param options   Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
   *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
   *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
   * @return A reference to this.
   * @stable ICU 3.8
   */
  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);

#endif

  /**
   * Case-folds the characters in this string.
   *
   * Case-folding is locale-independent and not context-sensitive,
   * but there is an option for whether to include or exclude mappings for dotted I
   * and dotless i that are marked with 'T' in CaseFolding.txt.
   *
   * The result may be longer or shorter than the original.
   *
   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   * @return A reference to this.
   * @stable ICU 2.0
   */
  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);

  //========================================
  // Access to the internal buffer
  //========================================

  /**
   * Get a read/write pointer to the internal buffer.
   * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
   * writable, and is still owned by the UnicodeString object.
   * Calls to getBuffer(minCapacity) must not be nested, and
   * must be matched with calls to releaseBuffer(newLength).
   * If the string buffer was read-only or shared,
   * then it will be reallocated and copied.
   *
   * An attempted nested call will return 0, and will not further modify the
   * state of the UnicodeString object.
   * It also returns 0 if the string is bogus.
   *
   * The actual capacity of the string buffer may be larger than minCapacity.
   * getCapacity() returns the actual capacity.
   * For many operations, the full capacity should be used to avoid reallocations.
   *
   * While the buffer is "open" between getBuffer(minCapacity)
   * and releaseBuffer(newLength), the following applies:
   * - The string length is set to 0.
   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
   * - You can read from and write to the returned buffer.
   * - The previous string contents will still be in the buffer;
   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
   *   If the length() was greater than minCapacity, then any contents after minCapacity
   *   may be lost.
   *   The buffer contents is not NUL-terminated by getBuffer().
   *   If length() < getCapacity() then you can terminate it by writing a NUL
   *   at index length().
   * - You must call releaseBuffer(newLength) before and in order to
   *   return to normal UnicodeString operation.
   *
   * @param minCapacity the minimum number of char16_ts that are to be available
   *        in the buffer, starting at the returned pointer;
   *        default to the current string capacity if minCapacity==-1
   * @return a writable pointer to the internal string buffer,
   *         or nullptr if an error occurs (nested calls, out of memory)
   *
   * @see releaseBuffer
   * @see getTerminatedBuffer()
   * @stable ICU 2.0
   */
  char16_t *getBuffer(int32_t minCapacity);

  /**
   * Release a read/write buffer on a UnicodeString object with an
   * "open" getBuffer(minCapacity).
   * This function must be called in a matched pair with getBuffer(minCapacity).
   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
   *
   * It will set the string length to newLength, at most to the current capacity.
   * If newLength==-1 then it will set the length according to the
   * first NUL in the buffer, or to the capacity if there is no NUL.
   *
   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
   *
   * @param newLength the new length of the UnicodeString object;
   *        defaults to the current capacity if newLength is greater than that;
   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
   *        the current capacity of the string
   *
   * @see getBuffer(int32_t minCapacity)
   * @stable ICU 2.0
   */
  void releaseBuffer(int32_t newLength=-1);

  /**
   * Get a read-only pointer to the internal buffer.
   * This can be called at any time on a valid UnicodeString.
   *
   * It returns 0 if the string is bogus, or
   * during an "open" getBuffer(minCapacity).
   *
   * It can be called as many times as desired.
   * The pointer that it returns will remain valid until the UnicodeString object is modified,
   * at which time the pointer is semantically invalidated and must not be used any more.
   *
   * The capacity of the buffer can be determined with getCapacity().
   * The part after length() may or may not be initialized and valid,
   * depending on the history of the UnicodeString object.
   *
   * The buffer contents is (probably) not NUL-terminated.
   * You can check if it is with
   * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
   * (See getTerminatedBuffer().)
   *
   * The buffer may reside in read-only memory. Its contents must not
   * be modified.
   *
   * @return a read-only pointer to the internal string buffer,
   *         or nullptr if the string is empty or bogus
   *
   * @see getBuffer(int32_t minCapacity)
   * @see getTerminatedBuffer()
   * @stable ICU 2.0
   */
  inline const char16_t *getBuffer() const;

  /**
   * Get a read-only pointer to the internal buffer,
   * making sure that it is NUL-terminated.
   * This can be called at any time on a valid UnicodeString.
   *
   * It returns 0 if the string is bogus, or
   * during an "open" getBuffer(minCapacity), or if the buffer cannot
   * be NUL-terminated (because memory allocation failed).
   *
   * It can be called as many times as desired.
   * The pointer that it returns will remain valid until the UnicodeString object is modified,
   * at which time the pointer is semantically invalidated and must not be used any more.
   *
   * The capacity of the buffer can be determined with getCapacity().
   * The part after length()+1 may or may not be initialized and valid,
   * depending on the history of the UnicodeString object.
   *
   * The buffer contents is guaranteed to be NUL-terminated.
   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
   * is written.
   * For this reason, this function is not const, unlike getBuffer().
   * Note that a UnicodeString may also contain NUL characters as part of its contents.
   *
   * The buffer may reside in read-only memory. Its contents must not
   * be modified.
   *
   * @return a read-only pointer to the internal string buffer,
   *         or 0 if the string is empty or bogus
   *
   * @see getBuffer(int32_t minCapacity)
   * @see getBuffer()
   * @stable ICU 2.2
   */
  const char16_t *getTerminatedBuffer();

  //========================================
  // Constructors
  //========================================

  /** Construct an empty UnicodeString.
   * @stable ICU 2.0
   */
  inline UnicodeString();

  /**
   * Construct a UnicodeString with capacity to hold `capacity` char16_ts
   * @param capacity the number of char16_ts this UnicodeString should hold
   * before a resize is necessary; if count is greater than 0 and count
   * code points c take up more space than capacity, then capacity is adjusted
   * accordingly.
   * @param c is used to initially fill the string
   * @param count specifies how many code points c are to be written in the
   *              string
   * @stable ICU 2.0
   */
  UnicodeString(int32_t capacity, UChar32 c, int32_t count);

  /**
   * Single char16_t (code unit) constructor.
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param ch the character to place in the UnicodeString
   * @stable ICU 2.0
   */
  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);

  /**
   * Single UChar32 (code point) constructor.
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param ch the character to place in the UnicodeString
   * @stable ICU 2.0
   */
  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);

  /**
   * char16_t* constructor.
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param text The characters to place in the UnicodeString.  `text`
   * must be NULL (U+0000) terminated.
   * @stable ICU 2.0
   */
  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);

#if !U_CHAR16_IS_TYPEDEF
  /**
   * uint16_t * constructor.
   * Delegates to UnicodeString(const char16_t *).
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param text NUL-terminated UTF-16 string
   * @stable ICU 59
   */
  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
      UnicodeString(ConstChar16Ptr(text)) {}
#endif

#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  /**
   * wchar_t * constructor.
   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   * Delegates to UnicodeString(const char16_t *).
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param text NUL-terminated UTF-16 string
   * @stable ICU 59
   */
  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
      UnicodeString(ConstChar16Ptr(text)) {}
#endif

  /**
   * nullptr_t constructor.
   * Effectively the same as the default constructor, makes an empty string object.
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param text nullptr
   * @stable ICU 59
   */
  UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);

  /**
   * char16_t* constructor.
   * @param text The characters to place in the UnicodeString.
   * @param textLength The number of Unicode characters in `text`
   * to copy.
   * @stable ICU 2.0
   */
  UnicodeString(const char16_t *text,
        int32_t textLength);

#if !U_CHAR16_IS_TYPEDEF
  /**
   * uint16_t * constructor.
   * Delegates to UnicodeString(const char16_t *, int32_t).
   * @param text UTF-16 string
   * @param textLength string length
   * @stable ICU 59
   */
  UnicodeString(const uint16_t *text, int32_t textLength) :
      UnicodeString(ConstChar16Ptr(text), textLength) {}
#endif

#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  /**
   * wchar_t * constructor.
   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   * Delegates to UnicodeString(const char16_t *, int32_t).
   * @param text NUL-terminated UTF-16 string
   * @param textLength string length
   * @stable ICU 59
   */
  UnicodeString(const wchar_t *text, int32_t textLength) :
      UnicodeString(ConstChar16Ptr(text), textLength) {}
#endif

  /**
   * nullptr_t constructor.
   * Effectively the same as the default constructor, makes an empty string object.
   * @param text nullptr
   * @param textLength ignored
   * @stable ICU 59
   */
  inline UnicodeString(const std::nullptr_t text, int32_t textLength);

  /**
   * Readonly-aliasing char16_t* constructor.
   * The text will be used for the UnicodeString object, but
   * it will not be released when the UnicodeString is destroyed.
   * This has copy-on-write semantics:
   * When the string is modified, then the buffer is first copied into
   * newly allocated memory.
   * The aliased buffer is never modified.
   *
   * In an assignment to another UnicodeString, when using the copy constructor
   * or the assignment operator, the text will be copied.
   * When using fastCopyFrom(), the text will be aliased again,
   * so that both strings then alias the same readonly-text.
   *
   * @param isTerminated specifies if `text` is `NUL`-terminated.
   *                     This must be true if `textLength==-1`.
   * @param text The characters to alias for the UnicodeString.
   * @param textLength The number of Unicode characters in `text` to alias.
   *                   If -1, then this constructor will determine the length
   *                   by calling `u_strlen()`.
   * @stable ICU 2.0
   */
  UnicodeString(UBool isTerminated,
                ConstChar16Ptr text,
                int32_t textLength);

  /**
   * Writable-aliasing char16_t* constructor.
   * The text will be used for the UnicodeString object, but
   * it will not be released when the UnicodeString is destroyed.
   * This has write-through semantics:
   * For as long as the capacity of the buffer is sufficient, write operations
   * will directly affect the buffer. When more capacity is necessary, then
   * a new buffer will be allocated and the contents copied as with regularly
   * constructed strings.
   * In an assignment to another UnicodeString, the buffer will be copied.
   * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
   * as the string buffer itself and will in this case not copy the contents.
   *
   * @param buffer The characters to alias for the UnicodeString.
   * @param buffLength The number of Unicode characters in `buffer` to alias.
   * @param buffCapacity The size of `buffer` in char16_ts.
   * @stable ICU 2.0
   */
  UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);

#if !U_CHAR16_IS_TYPEDEF
  /**
   * Writable-aliasing uint16_t * constructor.
   * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
   * @param buffer writable buffer of/for UTF-16 text
   * @param buffLength length of the current buffer contents
   * @param buffCapacity buffer capacity
   * @stable ICU 59
   */
  UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
      UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif

#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  /**
   * Writable-aliasing wchar_t * constructor.
   * (Only defined if U_SIZEOF_WCHAR_T==2.)
   * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
   * @param buffer writable buffer of/for UTF-16 text
   * @param buffLength length of the current buffer contents
   * @param buffCapacity buffer capacity
   * @stable ICU 59
   */
  UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
      UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif

  /**
   * Writable-aliasing nullptr_t constructor.
   * Effectively the same as the default constructor, makes an empty string object.
   * @param buffer nullptr
   * @param buffLength ignored
   * @param buffCapacity ignored
   * @stable ICU 59
   */
  inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);

#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION

  /**
   * char* constructor.
   * Uses the default converter (and thus depends on the ICU conversion code)
   * unless U_CHARSET_IS_UTF8 is set to 1.
   *
   * For ASCII (really "invariant character") strings it is more efficient to use
   * the constructor that takes a US_INV (for its enum EInvariant).
   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
   * UNICODE_STRING_SIMPLE.
   *
   * It is recommended to mark this constructor "explicit" by
   * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
   * on the compiler command line or similar.
   * @param codepageData an array of bytes, null-terminated,
   *                     in the platform's default codepage.
   * @stable ICU 2.0
   * @see UNICODE_STRING
   * @see UNICODE_STRING_SIMPLE
   */
  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);

  /**
   * char* constructor.
   * Uses the default converter (and thus depends on the ICU conversion code)
   * unless U_CHARSET_IS_UTF8 is set to 1.
   * @param codepageData an array of bytes in the platform's default codepage.
   * @param dataLength The number of bytes in `codepageData`.
   * @stable ICU 2.0
   */
  UnicodeString(const char *codepageData, int32_t dataLength);

#endif

#if !UCONFIG_NO_CONVERSION

  /**
   * char* constructor.
   * @param codepageData an array of bytes, null-terminated
   * @param codepage the encoding of `codepageData`.  The special
   * value 0 for `codepage` indicates that the text is in the
   * platform's default codepage.
   *
   * If `codepage` is an empty string (`""`),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
   * Recommendation: For invariant-character strings use the constructor
   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   * because it avoids object code dependencies of UnicodeString on
   * the conversion code.
   *
   * @stable ICU 2.0
   */
  UnicodeString(const char *codepageData, const char *codepage);

  /**
   * char* constructor.
   * @param codepageData an array of bytes.
   * @param dataLength The number of bytes in `codepageData`.
   * @param codepage the encoding of `codepageData`.  The special
   * value 0 for `codepage` indicates that the text is in the
   * platform's default codepage.
   * If `codepage` is an empty string (`""`),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
   * Recommendation: For invariant-character strings use the constructor
   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   * because it avoids object code dependencies of UnicodeString on
   * the conversion code.
   *
   * @stable ICU 2.0
   */
  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);

  /**
   * char * / UConverter constructor.
   * This constructor uses an existing UConverter object to
   * convert the codepage string to Unicode and construct a UnicodeString
   * from that.
   *
   * The converter is reset at first.
   * If the error code indicates a failure before this constructor is called,
   * or if an error occurs during conversion or construction,
   * then the string will be bogus.
   *
   * This function avoids the overhead of opening and closing a converter if
   * multiple strings are constructed.
   *
   * @param src input codepage string
   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
   * @param cnv converter object (ucnv_resetToUnicode() will be called),
   *        can be NULL for the default converter
   * @param errorCode normal ICU error code
   * @stable ICU 2.0
   */
  UnicodeString(
        const char *src, int32_t srcLength,
        UConverter *cnv,
        UErrorCode &errorCode);

#endif

  /**
   * Constructs a Unicode string from an invariant-character char * string.
   * About invariant characters see utypes.h.
   * This constructor has no runtime dependency on conversion code and is
   * therefore recommended over ones taking a charset name string
   * (where the empty string "" indicates invariant-character conversion).
   *
   * Use the macro US_INV as the third, signature-distinguishing parameter.
   *
   * For example:
   * \code
   *     void fn(const char *s) {
   *       UnicodeString ustr(s, -1, US_INV);
   *       // use ustr ...
   *     }
   * \endcode
   * @param src String using only invariant characters.
   * @param textLength Length of src, or -1 if NUL-terminated.
   * @param inv Signature-distinguishing parameter, use US_INV.
   *
   * @see US_INV
   * @stable ICU 3.2
   */
  UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);


  /**
   * Copy constructor.
   *
   * Starting with ICU 2.4, the assignment operator and the copy constructor
   * allocate a new buffer and copy the buffer contents even for readonly aliases.
   * By contrast, the fastCopyFrom() function implements the old,
   * more efficient but less safe behavior
   * of making this string also a readonly alias to the same buffer.
   *
   * If the source object has an "open" buffer from getBuffer(minCapacity),
   * then the copy is an empty string.
   *
   * @param that The UnicodeString object to copy.
   * @stable ICU 2.0
   * @see fastCopyFrom
   */
  UnicodeString(const UnicodeString& that);

  /**
   * Move constructor; might leave src in bogus state.
   * This string will have the same contents and state that the source string had.
   * @param src source string
   * @stable ICU 56
   */
  UnicodeString(UnicodeString &&src) U_NOEXCEPT;

  /**
   * 'Substring' constructor from tail of source string.
   * @param src The UnicodeString object to copy.
   * @param srcStart The offset into `src` at which to start copying.
   * @stable ICU 2.2
   */
  UnicodeString(const UnicodeString& src, int32_t srcStart);

  /**
   * 'Substring' constructor from subrange of source string.
   * @param src The UnicodeString object to copy.
   * @param srcStart The offset into `src` at which to start copying.
   * @param srcLength The number of characters from `src` to copy.
   * @stable ICU 2.2
   */
  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);

  /**
   * Clone this object, an instance of a subclass of Replaceable.
   * Clones can be used concurrently in multiple threads.
   * If a subclass does not implement clone(), or if an error occurs,
   * then NULL is returned.
   * The caller must delete the clone.
   *
   * @return a clone of this object
   *
   * @see Replaceable::clone
   * @see getDynamicClassID
   * @stable ICU 2.6
   */
  virtual UnicodeString *clone() const override;

  /** Destructor.
   * @stable ICU 2.0
   */
  virtual ~UnicodeString();

  /**
   * Create a UnicodeString from a UTF-8 string.
   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   * Calls u_strFromUTF8WithSub().
   *
   * @param utf8 UTF-8 input string.
   *             Note that a StringPiece can be implicitly constructed
   *             from a std::string or a NUL-terminated const char * string.
   * @return A UnicodeString with equivalent UTF-16 contents.
   * @see toUTF8
   * @see toUTF8String
   * @stable ICU 4.2
   */
  static UnicodeString fromUTF8(StringPiece utf8);

  /**
   * Create a UnicodeString from a UTF-32 string.
   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   * Calls u_strFromUTF32WithSub().
   *
   * @param utf32 UTF-32 input string. Must not be NULL.
   * @param length Length of the input string, or -1 if NUL-terminated.
   * @return A UnicodeString with equivalent UTF-16 contents.
   * @see toUTF32
   * @stable ICU 4.2
   */
  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);

  /* Miscellaneous operations */

  /**
   * Unescape a string of characters and return a string containing
   * the result.  The following escape sequences are recognized:
   *
   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
   * \\Uhhhhhhhh   8 hex digits
   * \\xhh         1-2 hex digits
   * \\ooo         1-3 octal digits; o in [0-7]
   * \\cX          control-X; X is masked with 0x1F
   *
   * as well as the standard ANSI C escapes:
   *
   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
   * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
   *
   * Anything else following a backslash is generically escaped.  For
   * example, "[a\\-z]" returns "[a-z]".
   *
   * If an escape sequence is ill-formed, this method returns an empty
   * string.  An example of an ill-formed sequence is "\\u" followed by
   * fewer than 4 hex digits.
   *
   * This function is similar to u_unescape() but not identical to it.
   * The latter takes a source char*, so it does escape recognition
   * and also invariant conversion.
   *
   * @return a string with backslash escapes interpreted, or an
   * empty string on error.
   * @see UnicodeString#unescapeAt()
   * @see u_unescape()
   * @see u_unescapeAt()
   * @stable ICU 2.0
   */
  UnicodeString unescape() const;

  /**
   * Unescape a single escape sequence and return the represented
   * character.  See unescape() for a listing of the recognized escape
   * sequences.  The character at offset-1 is assumed (without
   * checking) to be a backslash.  If the escape sequence is
   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
   * returned.
   *
   * @param offset an input output parameter.  On input, it is the
   * offset into this string where the escape sequence is located,
   * after the initial backslash.  On output, it is advanced after the
   * last character parsed.  On error, it is not advanced at all.
   * @return the character represented by the escape sequence at
   * offset, or U_SENTINEL=-1 on error.
   * @see UnicodeString#unescape()
   * @see u_unescape()
   * @see u_unescapeAt()
   * @stable ICU 2.0
   */
  UChar32 unescapeAt(int32_t &offset) const;

  /**
   * ICU "poor man's RTTI", returns a UClassID for this class.
   *
   * @stable ICU 2.2
   */
  static UClassID U_EXPORT2 getStaticClassID();

  /**
   * ICU "poor man's RTTI", returns a UClassID for the actual class.
   *
   * @stable ICU 2.2
   */
  virtual UClassID getDynamicClassID() const override;

  //========================================
  // Implementation methods
  //========================================

protected:
  /**
   * Implement Replaceable::getLength() (see jitterbug 1027).
   * @stable ICU 2.4
   */
  virtual int32_t getLength() const override;

  /**
   * The change in Replaceable to use virtual getCharAt() allows
   * UnicodeString::charAt() to be inline again (see jitterbug 709).
   * @stable ICU 2.4
   */
  virtual char16_t getCharAt(int32_t offset) const override;

  /**
   * The change in Replaceable to use virtual getChar32At() allows
   * UnicodeString::char32At() to be inline again (see jitterbug 709).
   * @stable ICU 2.4
   */
  virtual UChar32 getChar32At(int32_t offset) const override;

private:
  // For char* constructors. Could be made public.
  UnicodeString &setToUTF8(StringPiece utf8);
  // For extract(char*).
  // We could make a toUTF8(target, capacity, errorCode) public but not
  // this version: New API will be cleaner if we make callers create substrings
  // rather than having start+length on every method,
  // and it should take a UErrorCode&.
  int32_t
  toUTF8(int32_t start, int32_t len,
         char *target, int32_t capacity) const;

  /**
   * Internal string contents comparison, called by operator==.
   * Requires: this & text not bogus and have same lengths.
   */
  UBool doEquals(const UnicodeString &text, int32_t len) const;

  inline int8_t
  doCompare(int32_t start,
           int32_t length,
           const UnicodeString& srcText,
           int32_t srcStart,
           int32_t srcLength) const;

  int8_t doCompare(int32_t start,
           int32_t length,
           const char16_t *srcChars,
           int32_t srcStart,
           int32_t srcLength) const;

  inline int8_t
  doCompareCodePointOrder(int32_t start,
                          int32_t length,
                          const UnicodeString& srcText,
                          int32_t srcStart,
                          int32_t srcLength) const;

  int8_t doCompareCodePointOrder(int32_t start,
                                 int32_t length,
                                 const char16_t *srcChars,
                                 int32_t srcStart,
                                 int32_t srcLength) const;

  inline int8_t
  doCaseCompare(int32_t start,
                int32_t length,
                const UnicodeString &srcText,
                int32_t srcStart,
                int32_t srcLength,
                uint32_t options) const;

  int8_t
  doCaseCompare(int32_t start,
                int32_t length,
                const char16_t *srcChars,
                int32_t srcStart,
                int32_t srcLength,
                uint32_t options) const;

  int32_t doIndexOf(char16_t c,
            int32_t start,
            int32_t length) const;

  int32_t doIndexOf(UChar32 c,
                        int32_t start,
                        int32_t length) const;

  int32_t doLastIndexOf(char16_t c,
                int32_t start,
                int32_t length) const;

  int32_t doLastIndexOf(UChar32 c,
                            int32_t start,
                            int32_t length) const;

  void doExtract(int32_t start,
         int32_t length,
         char16_t *dst,
         int32_t dstStart) const;

  inline void doExtract(int32_t start,
         int32_t length,
         UnicodeString& target) const;

  inline char16_t doCharAt(int32_t offset)  const;

  UnicodeString& doReplace(int32_t start,
               int32_t length,
               const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength);

  UnicodeString& doReplace(int32_t start,
               int32_t length,
               const char16_t *srcChars,
               int32_t srcStart,
               int32_t srcLength);

  UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);

  UnicodeString& doReverse(int32_t start,
               int32_t length);

  // calculate hash code
  int32_t doHashCode(void) const;

  // get pointer to start of array
  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  inline char16_t* getArrayStart(void);
  inline const char16_t* getArrayStart(void) const;

  inline UBool hasShortLength() const;
  inline int32_t getShortLength() const;

  // A UnicodeString object (not necessarily its current buffer)
  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  inline UBool isWritable() const;

  // Is the current buffer writable?
  inline UBool isBufferWritable() const;

  // None of the following does releaseArray().
  inline void setZeroLength();
  inline void setShortLength(int32_t len);
  inline void setLength(int32_t len);
  inline void setToEmpty();
  inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags

  // allocate the array; result may be the stack buffer
  // sets refCount to 1 if appropriate
  // sets fArray, fCapacity, and flags
  // sets length to 0
  // returns boolean for success or failure
  UBool allocate(int32_t capacity);

  // release the array if owned
  void releaseArray(void);

  // turn a bogus string into an empty one
  void unBogus();

  // implements assignment operator, copy constructor, and fastCopyFrom()
  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);

  // Copies just the fields without memory management.
  void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;

  // Pin start and limit to acceptable values.
  inline void pinIndex(int32_t& start) const;
  inline void pinIndices(int32_t& start,
                         int32_t& length) const;

#if !UCONFIG_NO_CONVERSION

  /* Internal extract() using UConverter. */
  int32_t doExtract(int32_t start, int32_t length,
                    char *dest, int32_t destCapacity,
                    UConverter *cnv,
                    UErrorCode &errorCode) const;

  /*
   * Real constructor for converting from codepage data.
   * It assumes that it is called with !fRefCounted.
   *
   * If `codepage==0`, then the default converter
   * is used for the platform encoding.
   * If `codepage` is an empty string (`""`),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
   */
  void doCodepageCreate(const char *codepageData,
                        int32_t dataLength,
                        const char *codepage);

  /*
   * Worker function for creating a UnicodeString from
   * a codepage string using a UConverter.
   */
  void
  doCodepageCreate(const char *codepageData,
                   int32_t dataLength,
                   UConverter *converter,
                   UErrorCode &status);

#endif

  /*
   * This function is called when write access to the array
   * is necessary.
   *
   * We need to make a copy of the array if
   * the buffer is read-only, or
   * the buffer is refCounted (shared), and refCount>1, or
   * the buffer is too small.
   *
   * Return false if memory could not be allocated.
   */
  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
                            int32_t growCapacity = -1,
                            UBool doCopyArray = true,
                            int32_t **pBufferToDelete = 0,
                            UBool forceClone = false);

  /**
   * Common function for UnicodeString case mappings.
   * The stringCaseMapper has the same type UStringCaseMapper
   * as in ustr_imp.h for ustrcase_map().
   */
  UnicodeString &
  caseMap(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
          BreakIterator *iter,
#endif
          UStringCaseMapper *stringCaseMapper);

  // ref counting
  void addRef(void);
  int32_t removeRef(void);
  int32_t refCount(void) const;

  // constants
  enum {
    /**
     * Size of stack buffer for short strings.
     * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
     * @see UNISTR_OBJECT_SIZE
     */
    US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
    kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
    kInvalidHashCode=0, // invalid hash code
    kEmptyHashCode=1, // hash code for empty string

    // bit flag values for fLengthAndFlags
    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
    kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
    kRefCounted=4,      // there is a refCount field before the characters in fArray
    kBufferIsReadonly=8,// do not write to this buffer
    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
                        // and releaseBuffer(newLength) must be called
    kAllStorageFlags=0x1f,

    kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
    kLength1=1<<kLengthShift,
    kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
    kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength

    // combined values for convenience
    kShortString=kUsingStackBuffer,
    kLongString=kRefCounted,
    kReadonlyAlias=kBufferIsReadonly,
    kWritableAlias=0
  };

  friend class UnicodeStringAppendable;

  union StackBufferOrFields;        // forward declaration necessary before friend declaration
  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion

  /*
   * The following are all the class fields that are stored
   * in each UnicodeString object.
   * Note that UnicodeString has virtual functions,
   * therefore there is an implicit vtable pointer
   * as the first real field.
   * The fields should be aligned such that no padding is necessary.
   * On 32-bit machines, the size should be 32 bytes,
   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
   *
   * We use a hack to achieve this.
   *
   * With at least some compilers, each of the following is forced to
   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
   * rounded up with additional padding if the fields do not already fit that requirement:
   * - sizeof(class UnicodeString)
   * - offsetof(UnicodeString, fUnion)
   * - sizeof(fUnion)
   * - sizeof(fStackFields)
   *
   * We optimize for the longest possible internal buffer for short strings.
   * fUnion.fStackFields begins with 2 bytes for storage flags
   * and the length of relatively short strings,
   * followed by the buffer for short string contents.
   * There is no padding inside fStackFields.
   *
   * Heap-allocated and aliased strings use fUnion.fFields.
   * Both fStackFields and fFields must begin with the same fields for flags and short length,
   * that is, those must have the same memory offsets inside the object,
   * because the flags must be inspected in order to decide which half of fUnion is being used.
   * We assume that the compiler does not reorder the fields.
   *
   * (Padding at the end of fFields is ok:
   * As long as it is no larger than fStackFields, it is not wasted space.)
   *
   * For some of the history of the UnicodeString class fields layout, see
   * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
   * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
   * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
   */
  // (implicit) *vtable;
  union StackBufferOrFields {
    // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
    // Each struct of the union must begin with fLengthAndFlags.
    struct {
      int16_t fLengthAndFlags;          // bit fields: see constants above
      char16_t fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
    } fStackFields;
    struct {
      int16_t fLengthAndFlags;          // bit fields: see constants above
      int32_t fLength;    // number of characters in fArray if >127; else undefined
      int32_t fCapacity;  // capacity of fArray (in char16_ts)
      // array pointer last to minimize padding for machines with P128 data model
      // or pointer sizes that are not a power of 2
      char16_t   *fArray;    // the Unicode data
    } fFields;
  } fUnion;
};

/**
 * Create a new UnicodeString with the concatenation of two others.
 *
 * @param s1 The first string to be copied to the new one.
 * @param s2 The second string to be copied to the new one, after s1.
 * @return UnicodeString(s1).append(s2)
 * @stable ICU 2.8
 */
U_COMMON_API UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2);

//========================================
// Inline members
//========================================

//========================================
// Privates
//========================================

inline void
UnicodeString::pinIndex(int32_t& start) const
{
  // pin index
  if(start < 0) {
    start = 0;
  } else if(start > length()) {
    start = length();
  }
}

inline void
UnicodeString::pinIndices(int32_t& start,
                          int32_t& _length) const
{
  // pin indices
  int32_t len = length();
  if(start < 0) {
    start = 0;
  } else if(start > len) {
    start = len;
  }
  if(_length < 0) {
    _length = 0;
  } else if(_length > (len - start)) {
    _length = (len - start);
  }
}

inline char16_t*
UnicodeString::getArrayStart() {
  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
}

inline const char16_t*
UnicodeString::getArrayStart() const {
  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
}

//========================================
// Default constructor
//========================================

inline
UnicodeString::UnicodeString() {
  fUnion.fStackFields.fLengthAndFlags=kShortString;
}

inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
  fUnion.fStackFields.fLengthAndFlags=kShortString;
}

inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
  fUnion.fStackFields.fLengthAndFlags=kShortString;
}

inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
  fUnion.fStackFields.fLengthAndFlags=kShortString;
}

//========================================
// Read-only implementation methods
//========================================
inline UBool
UnicodeString::hasShortLength() const {
  return fUnion.fFields.fLengthAndFlags>=0;
}

inline int32_t
UnicodeString::getShortLength() const {
  // fLengthAndFlags must be non-negative -> short length >= 0
  // and arithmetic or logical shift does not matter.
  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
}

inline int32_t
UnicodeString::length() const {
  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
}

inline int32_t
UnicodeString::getCapacity() const {
  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
    US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
}

inline int32_t
UnicodeString::hashCode() const
{ return doHashCode(); }

inline UBool
UnicodeString::isBogus() const
{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }

inline UBool
UnicodeString::isWritable() const
{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }

inline UBool
UnicodeString::isBufferWritable() const
{
  return (UBool)(
      !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
      (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
}

inline const char16_t *
UnicodeString::getBuffer() const {
  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
    return nullptr;
  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
    return fUnion.fStackFields.fBuffer;
  } else {
    return fUnion.fFields.fArray;
  }
}

//========================================
// Read-only alias methods
//========================================
inline int8_t
UnicodeString::doCompare(int32_t start,
              int32_t thisLength,
              const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength) const
{
  if(srcText.isBogus()) {
    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
    srcText.pinIndices(srcStart, srcLength);
    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  }
}

inline bool
UnicodeString::operator== (const UnicodeString& text) const
{
  if(isBogus()) {
    return text.isBogus();
  } else {
    int32_t len = length(), textLength = text.length();
    return !text.isBogus() && len == textLength && doEquals(text, len);
  }
}

inline bool
UnicodeString::operator!= (const UnicodeString& text) const
{ return (! operator==(text)); }

inline UBool
UnicodeString::operator> (const UnicodeString& text) const
{ return doCompare(0, length(), text, 0, text.length()) == 1; }

inline UBool
UnicodeString::operator< (const UnicodeString& text) const
{ return doCompare(0, length(), text, 0, text.length()) == -1; }

inline UBool
UnicodeString::operator>= (const UnicodeString& text) const
{ return doCompare(0, length(), text, 0, text.length()) != -1; }

inline UBool
UnicodeString::operator<= (const UnicodeString& text) const
{ return doCompare(0, length(), text, 0, text.length()) != 1; }

inline int8_t
UnicodeString::compare(const UnicodeString& text) const
{ return doCompare(0, length(), text, 0, text.length()); }

inline int8_t
UnicodeString::compare(int32_t start,
               int32_t _length,
               const UnicodeString& srcText) const
{ return doCompare(start, _length, srcText, 0, srcText.length()); }

inline int8_t
UnicodeString::compare(ConstChar16Ptr srcChars,
               int32_t srcLength) const
{ return doCompare(0, length(), srcChars, 0, srcLength); }

inline int8_t
UnicodeString::compare(int32_t start,
               int32_t _length,
               const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength) const
{ return doCompare(start, _length, srcText, srcStart, srcLength); }

inline int8_t
UnicodeString::compare(int32_t start,
               int32_t _length,
               const char16_t *srcChars) const
{ return doCompare(start, _length, srcChars, 0, _length); }

inline int8_t
UnicodeString::compare(int32_t start,
               int32_t _length,
               const char16_t *srcChars,
               int32_t srcStart,
               int32_t srcLength) const
{ return doCompare(start, _length, srcChars, srcStart, srcLength); }

inline int8_t
UnicodeString::compareBetween(int32_t start,
                  int32_t limit,
                  const UnicodeString& srcText,
                  int32_t srcStart,
                  int32_t srcLimit) const
{ return doCompare(start, limit - start,
           srcText, srcStart, srcLimit - srcStart); }

inline int8_t
UnicodeString::doCompareCodePointOrder(int32_t start,
                                       int32_t thisLength,
                                       const UnicodeString& srcText,
                                       int32_t srcStart,
                                       int32_t srcLength) const
{
  if(srcText.isBogus()) {
    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
    srcText.pinIndices(srcStart, srcLength);
    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  }
}

inline int8_t
UnicodeString::compareCodePointOrder(const UnicodeString& text) const
{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }

inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
                                     int32_t _length,
                                     const UnicodeString& srcText) const
{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }

inline int8_t
UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
                                     int32_t srcLength) const
{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }

inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
                                     int32_t _length,
                                     const UnicodeString& srcText,
                                     int32_t srcStart,
                                     int32_t srcLength) const
{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }

inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
                                     int32_t _length,
                                     const char16_t *srcChars) const
{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }

inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
                                     int32_t _length,
                                     const char16_t *srcChars,
                                     int32_t srcStart,
                                     int32_t srcLength) const
{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }

inline int8_t
UnicodeString::compareCodePointOrderBetween(int32_t start,
                                            int32_t limit,
                                            const UnicodeString& srcText,
                                            int32_t srcStart,
                                            int32_t srcLimit) const
{ return doCompareCodePointOrder(start, limit - start,
           srcText, srcStart, srcLimit - srcStart); }

inline int8_t
UnicodeString::doCaseCompare(int32_t start,
                             int32_t thisLength,
                             const UnicodeString &srcText,
                             int32_t srcStart,
                             int32_t srcLength,
                             uint32_t options) const
{
  if(srcText.isBogus()) {
    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  } else {
    srcText.pinIndices(srcStart, srcLength);
    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  }
}

inline int8_t
UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  return doCaseCompare(0, length(), text, 0, text.length(), options);
}

inline int8_t
UnicodeString::caseCompare(int32_t start,
                           int32_t _length,
                           const UnicodeString &srcText,
                           uint32_t options) const {
  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
}

inline int8_t
UnicodeString::caseCompare(ConstChar16Ptr srcChars,
                           int32_t srcLength,
                           uint32_t options) const {
  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
}

inline int8_t
UnicodeString::caseCompare(int32_t start,
                           int32_t _length,
                           const UnicodeString &srcText,
                           int32_t srcStart,
                           int32_t srcLength,
                           uint32_t options) const {
  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
}

inline int8_t
UnicodeString::caseCompare(int32_t start,
                           int32_t _length,
                           const char16_t *srcChars,
                           uint32_t options) const {
  return doCaseCompare(start, _length, srcChars, 0, _length, options);
}

inline int8_t
UnicodeString::caseCompare(int32_t start,
                           int32_t _length,
                           const char16_t *srcChars,
                           int32_t srcStart,
                           int32_t srcLength,
                           uint32_t options) const {
  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
}

inline int8_t
UnicodeString::caseCompareBetween(int32_t start,
                                  int32_t limit,
                                  const UnicodeString &srcText,
                                  int32_t srcStart,
                                  int32_t srcLimit,
                                  uint32_t options) const {
  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
}

inline int32_t
UnicodeString::indexOf(const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength,
               int32_t start,
               int32_t _length) const
{
  if(!srcText.isBogus()) {
    srcText.pinIndices(srcStart, srcLength);
    if(srcLength > 0) {
      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
    }
  }
  return -1;
}

inline int32_t
UnicodeString::indexOf(const UnicodeString& text) const
{ return indexOf(text, 0, text.length(), 0, length()); }

inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
               int32_t start) const {
  pinIndex(start);
  return indexOf(text, 0, text.length(), start, length() - start);
}

inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
               int32_t start,
               int32_t _length) const
{ return indexOf(text, 0, text.length(), start, _length); }

inline int32_t
UnicodeString::indexOf(const char16_t *srcChars,
               int32_t srcLength,
               int32_t start) const {
  pinIndex(start);
  return indexOf(srcChars, 0, srcLength, start, length() - start);
}

inline int32_t
UnicodeString::indexOf(ConstChar16Ptr srcChars,
               int32_t srcLength,
               int32_t start,
               int32_t _length) const
{ return indexOf(srcChars, 0, srcLength, start, _length); }

inline int32_t
UnicodeString::indexOf(char16_t c,
               int32_t start,
               int32_t _length) const
{ return doIndexOf(c, start, _length); }

inline int32_t
UnicodeString::indexOf(UChar32 c,
               int32_t start,
               int32_t _length) const
{ return doIndexOf(c, start, _length); }

inline int32_t
UnicodeString::indexOf(char16_t c) const
{ return doIndexOf(c, 0, length()); }

inline int32_t
UnicodeString::indexOf(UChar32 c) const
{ return indexOf(c, 0, length()); }

inline int32_t
UnicodeString::indexOf(char16_t c,
               int32_t start) const {
  pinIndex(start);
  return doIndexOf(c, start, length() - start);
}

inline int32_t
UnicodeString::indexOf(UChar32 c,
               int32_t start) const {
  pinIndex(start);
  return indexOf(c, start, length() - start);
}

inline int32_t
UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
               int32_t srcLength,
               int32_t start,
               int32_t _length) const
{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }

inline int32_t
UnicodeString::lastIndexOf(const char16_t *srcChars,
               int32_t srcLength,
               int32_t start) const {
  pinIndex(start);
  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
}

inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength,
               int32_t start,
               int32_t _length) const
{
  if(!srcText.isBogus()) {
    srcText.pinIndices(srcStart, srcLength);
    if(srcLength > 0) {
      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
    }
  }
  return -1;
}

inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
               int32_t start,
               int32_t _length) const
{ return lastIndexOf(text, 0, text.length(), start, _length); }

inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
               int32_t start) const {
  pinIndex(start);
  return lastIndexOf(text, 0, text.length(), start, length() - start);
}

inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text) const
{ return lastIndexOf(text, 0, text.length(), 0, length()); }

inline int32_t
UnicodeString::lastIndexOf(char16_t c,
               int32_t start,
               int32_t _length) const
{ return doLastIndexOf(c, start, _length); }

inline int32_t
UnicodeString::lastIndexOf(UChar32 c,
               int32_t start,
               int32_t _length) const {
  return doLastIndexOf(c, start, _length);
}

inline int32_t
UnicodeString::lastIndexOf(char16_t c) const
{ return doLastIndexOf(c, 0, length()); }

inline int32_t
UnicodeString::lastIndexOf(UChar32 c) const {
  return lastIndexOf(c, 0, length());
}

inline int32_t
UnicodeString::lastIndexOf(char16_t c,
               int32_t start) const {
  pinIndex(start);
  return doLastIndexOf(c, start, length() - start);
}

inline int32_t
UnicodeString::lastIndexOf(UChar32 c,
               int32_t start) const {
  pinIndex(start);
  return lastIndexOf(c, start, length() - start);
}

inline UBool
UnicodeString::startsWith(const UnicodeString& text) const
{ return compare(0, text.length(), text, 0, text.length()) == 0; }

inline UBool
UnicodeString::startsWith(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength) const
{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }

inline UBool
UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
  if(srcLength < 0) {
    srcLength = u_strlen(toUCharPtr(srcChars));
  }
  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
}

inline UBool
UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
  if(srcLength < 0) {
    srcLength = u_strlen(toUCharPtr(srcChars));
  }
  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
}

inline UBool
UnicodeString::endsWith(const UnicodeString& text) const
{ return doCompare(length() - text.length(), text.length(),
           text, 0, text.length()) == 0; }

inline UBool
UnicodeString::endsWith(const UnicodeString& srcText,
            int32_t srcStart,
            int32_t srcLength) const {
  srcText.pinIndices(srcStart, srcLength);
  return doCompare(length() - srcLength, srcLength,
                   srcText, srcStart, srcLength) == 0;
}

inline UBool
UnicodeString::endsWith(ConstChar16Ptr srcChars,
            int32_t srcLength) const {
  if(srcLength < 0) {
    srcLength = u_strlen(toUCharPtr(srcChars));
  }
  return doCompare(length() - srcLength, srcLength,
                   srcChars, 0, srcLength) == 0;
}

inline UBool
UnicodeString::endsWith(const char16_t *srcChars,
            int32_t srcStart,
            int32_t srcLength) const {
  if(srcLength < 0) {
    srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
  }
  return doCompare(length() - srcLength, srcLength,
                   srcChars, srcStart, srcLength) == 0;
}

//========================================
// replace
//========================================
inline UnicodeString&
UnicodeString::replace(int32_t start,
               int32_t _length,
               const UnicodeString& srcText)
{ return doReplace(start, _length, srcText, 0, srcText.length()); }

inline UnicodeString&
UnicodeString::replace(int32_t start,
               int32_t _length,
               const UnicodeString& srcText,
               int32_t srcStart,
               int32_t srcLength)
{ return doReplace(start, _length, srcText, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::replace(int32_t start,
               int32_t _length,
               ConstChar16Ptr srcChars,
               int32_t srcLength)
{ return doReplace(start, _length, srcChars, 0, srcLength); }

inline UnicodeString&
UnicodeString::replace(int32_t start,
               int32_t _length,
               const char16_t *srcChars,
               int32_t srcStart,
               int32_t srcLength)
{ return doReplace(start, _length, srcChars, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::replace(int32_t start,
               int32_t _length,
               char16_t srcChar)
{ return doReplace(start, _length, &srcChar, 0, 1); }

inline UnicodeString&
UnicodeString::replaceBetween(int32_t start,
                  int32_t limit,
                  const UnicodeString& srcText)
{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }

inline UnicodeString&
UnicodeString::replaceBetween(int32_t start,
                  int32_t limit,
                  const UnicodeString& srcText,
                  int32_t srcStart,
                  int32_t srcLimit)
{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }

inline UnicodeString&
UnicodeString::findAndReplace(const UnicodeString& oldText,
                  const UnicodeString& newText)
{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
            newText, 0, newText.length()); }

inline UnicodeString&
UnicodeString::findAndReplace(int32_t start,
                  int32_t _length,
                  const UnicodeString& oldText,
                  const UnicodeString& newText)
{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
            newText, 0, newText.length()); }

// ============================
// extract
// ============================
inline void
UnicodeString::doExtract(int32_t start,
             int32_t _length,
             UnicodeString& target) const
{ target.replace(0, target.length(), *this, start, _length); }

inline void
UnicodeString::extract(int32_t start,
               int32_t _length,
               Char16Ptr target,
               int32_t targetStart) const
{ doExtract(start, _length, target, targetStart); }

inline void
UnicodeString::extract(int32_t start,
               int32_t _length,
               UnicodeString& target) const
{ doExtract(start, _length, target); }

#if !UCONFIG_NO_CONVERSION

inline int32_t
UnicodeString::extract(int32_t start,
               int32_t _length,
               char *dst,
               const char *codepage) const

{
  // This dstSize value will be checked explicitly
  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
}

#endif

inline void
UnicodeString::extractBetween(int32_t start,
                  int32_t limit,
                  char16_t *dst,
                  int32_t dstStart) const {
  pinIndex(start);
  pinIndex(limit);
  doExtract(start, limit - start, dst, dstStart);
}

inline UnicodeString
UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
    return tempSubString(start, limit - start);
}

inline char16_t
UnicodeString::doCharAt(int32_t offset) const
{
  if((uint32_t)offset < (uint32_t)length()) {
    return getArrayStart()[offset];
  } else {
    return kInvalidUChar;
  }
}

inline char16_t
UnicodeString::charAt(int32_t offset) const
{ return doCharAt(offset); }

inline char16_t
UnicodeString::operator[] (int32_t offset) const
{ return doCharAt(offset); }

inline UBool
UnicodeString::isEmpty() const {
  // Arithmetic or logical right shift does not matter: only testing for 0.
  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
}

//========================================
// Write implementation methods
//========================================
inline void
UnicodeString::setZeroLength() {
  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
}

inline void
UnicodeString::setShortLength(int32_t len) {
  // requires 0 <= len <= kMaxShortLength
  fUnion.fFields.fLengthAndFlags =
    (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
}

inline void
UnicodeString::setLength(int32_t len) {
  if(len <= kMaxShortLength) {
    setShortLength(len);
  } else {
    fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
    fUnion.fFields.fLength = len;
  }
}

inline void
UnicodeString::setToEmpty() {
  fUnion.fFields.fLengthAndFlags = kShortString;
}

inline void
UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
  setLength(len);
  fUnion.fFields.fArray = array;
  fUnion.fFields.fCapacity = capacity;
}

inline UnicodeString&
UnicodeString::operator= (char16_t ch)
{ return doReplace(0, length(), &ch, 0, 1); }

inline UnicodeString&
UnicodeString::operator= (UChar32 ch)
{ return replace(0, length(), ch); }

inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText,
             int32_t srcStart,
             int32_t srcLength)
{
  unBogus();
  return doReplace(0, length(), srcText, srcStart, srcLength);
}

inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText,
             int32_t srcStart)
{
  unBogus();
  srcText.pinIndex(srcStart);
  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
}

inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText)
{
  return copyFrom(srcText);
}

inline UnicodeString&
UnicodeString::setTo(const char16_t *srcChars,
             int32_t srcLength)
{
  unBogus();
  return doReplace(0, length(), srcChars, 0, srcLength);
}

inline UnicodeString&
UnicodeString::setTo(char16_t srcChar)
{
  unBogus();
  return doReplace(0, length(), &srcChar, 0, 1);
}

inline UnicodeString&
UnicodeString::setTo(UChar32 srcChar)
{
  unBogus();
  return replace(0, length(), srcChar);
}

inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength)
{ return doAppend(srcText, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText)
{ return doAppend(srcText, 0, srcText.length()); }

inline UnicodeString&
UnicodeString::append(const char16_t *srcChars,
              int32_t srcStart,
              int32_t srcLength)
{ return doAppend(srcChars, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::append(ConstChar16Ptr srcChars,
              int32_t srcLength)
{ return doAppend(srcChars, 0, srcLength); }

inline UnicodeString&
UnicodeString::append(char16_t srcChar)
{ return doAppend(&srcChar, 0, 1); }

inline UnicodeString&
UnicodeString::operator+= (char16_t ch)
{ return doAppend(&ch, 0, 1); }

inline UnicodeString&
UnicodeString::operator+= (UChar32 ch) {
  return append(ch);
}

inline UnicodeString&
UnicodeString::operator+= (const UnicodeString& srcText)
{ return doAppend(srcText, 0, srcText.length()); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength)
{ return doReplace(start, 0, srcText, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              const UnicodeString& srcText)
{ return doReplace(start, 0, srcText, 0, srcText.length()); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              const char16_t *srcChars,
              int32_t srcStart,
              int32_t srcLength)
{ return doReplace(start, 0, srcChars, srcStart, srcLength); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              ConstChar16Ptr srcChars,
              int32_t srcLength)
{ return doReplace(start, 0, srcChars, 0, srcLength); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              char16_t srcChar)
{ return doReplace(start, 0, &srcChar, 0, 1); }

inline UnicodeString&
UnicodeString::insert(int32_t start,
              UChar32 srcChar)
{ return replace(start, 0, srcChar); }


inline UnicodeString&
UnicodeString::remove()
{
  // remove() of a bogus string makes the string empty and non-bogus
  if(isBogus()) {
    setToEmpty();
  } else {
    setZeroLength();
  }
  return *this;
}

inline UnicodeString&
UnicodeString::remove(int32_t start,
             int32_t _length)
{
    if(start <= 0 && _length == INT32_MAX) {
        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
        return remove();
    }
    return doReplace(start, _length, NULL, 0, 0);
}

inline UnicodeString&
UnicodeString::removeBetween(int32_t start,
                int32_t limit)
{ return doReplace(start, limit - start, NULL, 0, 0); }

inline UnicodeString &
UnicodeString::retainBetween(int32_t start, int32_t limit) {
  truncate(limit);
  return doReplace(0, start, NULL, 0, 0);
}

inline UBool
UnicodeString::truncate(int32_t targetLength)
{
  if(isBogus() && targetLength == 0) {
    // truncate(0) of a bogus string makes the string empty and non-bogus
    unBogus();
    return false;
  } else if((uint32_t)targetLength < (uint32_t)length()) {
    setLength(targetLength);
    return true;
  } else {
    return false;
  }
}

inline UnicodeString&
UnicodeString::reverse()
{ return doReverse(0, length()); }

inline UnicodeString&
UnicodeString::reverse(int32_t start,
               int32_t _length)
{ return doReverse(start, _length); }

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */

#endif
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             