Provides methods for querying the properties of Unicode characters. More...

#include <rw/i18n/RWUCharTraits.h>

Public Types
enum	BidirectionalCategory { NoBidirectionalCategory, BeginBidirectionalCategory, LeftToRight, RightToLeft, EuropeanNumber, EuropeanNumberSeparator, EuropeanNumberTerminator, ArabicNumber, CommonNumberSeparator, BlockSeparator, SegmentSeparator, WhiteSpaceNeutral, OtherNeutral, LeftToRightEmbedding, LeftToRightOverride, RightToLeftArabic, RightToLeftEmbedding, RightToLeftOverride, PopDirectionalFormat, DirNonSpacingMark, BoundaryNeutral, EndBidirectionalCategory }

enum	Block { NoBlock, BeginBlock, BasicLatinBlock, Latin1SupplementBlock, LatinExtendedABlock, LatinExtendedBBlock, IpaExtensionsBlock, SpacingModifierLettersBlock, CombiningDiacriticalMarksBlock, GreekAndCopticBlock, CyrillicBlock, CyrillicSupplementalblock, ArmenianBlock, HebrewBlock, ArabicBlock, SyriacBlock, ThaanaBlock, DevanagariBlock, BengaliBlock, GurmukhiBlock, GujaratiBlock, OriyaBlock, TamilBlock, TeluguBlock, KannadaBlock, MalayalamBlock, SinhalaBlock, ThaiBlock, LaoBlock, TibetanBlock, MyanmarBlock, GeorgianBlock, HangulJamoBlock, EthiopicBlock, CherokeeBlock, UnifiedCanadianAboriginalSyllabicsBlock, OghamBlock, RunicBlock, TagalogBlock, HanunooBlock, BuhidBlock, TagbanwaBlock, KhmerBlock, MongolianBlock, LatinExtendedAdditionalBlock, GreekExtendedBlock, GeneralPunctuationBlock, SuperscriptsAndSubscriptsBlock, CurrencySymbolsBlock, CombiningDiacriticalMarksForSymbolsBlock, LetterlikeSymbolsBlock, NumberFormsBlock, ArrowsBlock, MathematicalOperatorsBlock, MiscellaneousTechnicalBlock, ControlPicturesBlock, OpticalCharacterRecognitionBlock, EnclosedAlphanumericsBlock, BoxDrawingBlock, BlockElementsBlock, GeometricShapesBlock, MiscellaneousSymbolsBlock, DingbatsBlock, MiscellaneousMathematicalSymbolsABlock, SupplementalArrowsABlock, BraillePatternsBlock, SupplementalArrowsBBlock, MiscellaneousMathematicalSymbolsBBlock, SupplementalMathematicalOperatorsBlock, CjkRadicalsSupplementBlock, KangxiRadicalsBlock, IdeographicDescriptionCharactersBlock, CjkSymbolsAndPunctuationBlock, HiraganaBlock, KatakanaBlock, BopomofoBlock, HangulCompatibilityJamoBlock, KanbunBlock, BopomofoExtendedBlock, KatakanaPhoneticExtensionsBlock, EnclosedCjkLettersAndMonthsBlock, CjkCompatibilityBlock, CjkUnifiedIdeographsExtensionABlock, CjkUnifiedIdeographsBlock, YiSyllablesBlock, YiRadicalsBlock, HangulSyllablesBlock, HighSurrogatesBlock, HighPrivateUseSurrogatesBlock, LowSurrogatesBlock, PrivateUseAreaBlock, CjkCompatibilityIdeographsBlock, AlphabeticPresentationFormsBlock, ArabicPresentationFormsABlock, VariationSelectorsBlock, CombiningHalfMarksBlock, CjkCompatibilityFormsBlock, SmallFormVariantsBlock, ArabicPresentationFormsBBlock, HalfwidthAndFullwidthFormsBlock, SpecialsBlock, OldItalicBlock, GothicBlock, DeseretBlock, ByzantineMusicalSymbolsBlock, MusicalSymbolsBlock, MathematicalAlphanumericSymbolsBlock, CjkUnifiedIdeographsExtensionBBlock, CjkCompatibilityIdeographsSupplementBlock, TagsBlock, PrivateUseBlock, SupplementaryPrivateUseAreaABlock, SupplementaryPrivateUseAreaBBlock, InvalidBlock, EndBlock }

enum	CombiningClass { BeginCombiningClass, BaseEquivalent, Spacing, Nonspacing, Split, Enclosing, Reordrant, TibetanSubjoined, OverlayInterior, Nuktas, KanaVoicingMarks, Viramas, StartOfFixedPositionClasses, HebrewPointSheva, HebrewPointHatafSegol, HebrewPointHatafPatah, HebrewPointHatafQamats, HebrewPointHiriq, HebrewPointTsere, HebrewPointSegol, HebrewPointPatah, HebrewPointQamats, HebrewPointHolam, HebrewPointQubuts, HebrewPointDagishOrMapiq, HebrewPointMeteg, HebrewPointRafe, HebrewPointShinDot, HebrewPointSinDot, HebrewPointJudeoSpanishVarika, ArabicFathatan, ArabicDammatan, ArabicKasratan, ArabicFatha, ArabicDamma, ArabicKasra, ArabicShadda, ArabicSukun, ArabicLetterSuperscriptAlef, SyriacLetterSuperscriptAlaph, TeluguLengthMark, TeluguAiLengthMark, ThaiCharacterSaraU, ThaiCharacterSaraUu, ThaiCharacterMaiEk, ThaiCharacterMaiTho, ThaiCharacterMaiTri, ThaiCharacterMaiChattawa, LaoVowelSignU, LaoVowelSignUu, LaoToneMaiEk, LaoToneMaiTho, LaoToneMaiTi, LaoToneMaiCatawa, TibetanVowelSignAa, TibetanVowelSignI, TibetanVowelSignE, TibetanVowelSignEe, TibetanVowelSignO, TibetanVowelSignOo, TibetanVowelSignReversedI, TibetanVowelSignU, EndOfFixedPositionClasses, BelowLeftAttached, BelowAttached, BelowRightAttached, LeftAttached, RightAttached, AboveLeftAttached, AboveAttached, AboveRightAttached, BelowLeft, Below, BelowRight, Left, Right, AboveLeft, Above, AboveRight, DoubleBelow, DoubleAbove, IotaSubscript, EndCombiningClass, NoCombiningClass }

enum	EastAsianWidth { NeutralWidth, AmbiguousWidth, HalfWidth, FullWidth, NarrowWidth, WideWidth }

enum	GeneralCategory { BeginGeneralCategory, Unassigned, UppercaseLetter, LowercaseLetter, TitlecaseLetter, ModifierLetter, OtherLetter, NonSpacingMark, EnclosingMark, CombiningSpacingMark, DecimalDigitNumber, LetterNumber, OtherNumber, SpaceSeparator, LineSeparator, ParagraphSeparator, ControlChar, FormatChar, PrivateUseChar, Surrogate, DashPunctuation, StartPunctuation, EndPunctuation, ConnectorPunctuation, OtherPunctuation, MathSymbol, CurrencySymbol, ModifierSymbol, OtherSymbol, InitialPunctuation, FinalPunctuation, GeneralOtherTypes, EndGeneralCategory }

enum	Script { BeginScript, Latin, Greek, Cyrillic, Armenian, Hebrew, Arabic, Syriac, Thaana, Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar, Georgian, Hangul, Ethiopic, Cherokee, CanadianAboriginal, Ogham, Runic, Khmer, Mongolian, Hiragana, Katakana, Bopomofo, Han, Yi, OldItalic, Gothic, Deseret, Inherited, NoScript, InvalidScript, EndScript }

Static Public Member Functions
static BidirectionalCategory	getBidirectionalCategory (RWUChar32 cp)

static Block	getBlock (RWUChar32 cp)

static RWUChar32	getChar32 (const char *name, bool isDeprecatedName=false)

static CombiningClass	getCombiningClass (RWUChar32 cp)

static int32_t	getDecimalValue (RWUChar32 cp)

static RWUChar32	getDigit (int32_t value, int8_t radix)

static EastAsianWidth	getEastAsianWidth (RWUChar32 cp)

static GeneralCategory	getGeneralCategory (RWUChar32 cp)

static RWUChar32	getMirror (RWUChar32 cp)

static RWCString	getName (RWUChar32 cp, bool getDeprecatedName=false)

static int32_t	getNumericValue (RWUChar32 cp, int8_t radix)

static Script	getScript (RWUChar32 cp)

static const RWUChar32 *	getWhitespace ()

static bool	isCharacter (RWUChar32 cp)

static bool	isControl (RWUChar32 cp)

static bool	isDecimalDigit (RWUChar32 cp)

static bool	isDefined (RWUChar32 cp)

static bool	isDigit (RWUChar32 cp)

static bool	isError (RWUChar32 cp)

static bool	isHighSurrogate (RWUChar16 cu)

static bool	isLetter (RWUChar32 cp)

static bool	isLower (RWUChar32 cp)

static bool	isLowSurrogate (RWUChar16 cu)

static bool	isMirrored (RWUChar32 cp)

static bool	isNumeric (RWUChar32 cp)

static bool	isPunctuation (RWUChar32 cp)

static bool	isSingle (RWUChar16 cu)

static bool	isSpace (RWUChar32 cp)

static bool	isSurrogate (RWUChar16 cu)

static bool	isTitle (RWUChar32 cp)

static bool	isUpper (RWUChar32 cp)

static bool	isWhitespace (RWUChar32 cp)

static bool	requiresSurrogatePair (RWUChar32 cp)

static RWUChar32	toLower (RWUChar32 cp)

static RWUChar32	toTitle (RWUChar32 cp)

static RWUChar32	toUpper (RWUChar32 cp)

Detailed Description

RWUCharTraits provides methods for querying the properties of Unicode characters.

The Unicode Standard defines a comprehensive set of properties for each code point in the Unicode character set. The set of properties and the values of those properties are specified by the Unicode Character Database:

http://www.unicode.org/onlinedat/online.html

that is published as part of the Unicode Standard:

http://www.unicode.org/standard/standard.html

Unicode character properties may be either normative or informative, as defined in Chapter 3, "Conformance", of the Unicode Standard:

Normative properties are required for conformance with the Unicode Standard. Implementations that claim conformance to the Unicode Standard and that make use of a particular normative property must follow the specifications of the standard for that property to be conformant.
Informative properties are strongly recommended, but a conformant implementation is free to use or change such values as it may require, while still remaining conformant to the standard.

RWUCharTraits provides access to both normative and informative properties of Unicode characters.

Member Enumeration Documentation

enum RWUCharTraits::BidirectionalCategory

An enumeration used to describe the linguistic direction of a Unicode character code point. The values in this enumeration correspond to the bidirectional category property codes that appear in the file UnicodeData.txt of the Unicode Character Database, as described in

Enumerator
NoBidirectionalCategory
BeginBidirectionalCategory
LeftToRight	L
RightToLeft	R
EuropeanNumber	EN
EuropeanNumberSeparator	ES
EuropeanNumberTerminator	ET
ArabicNumber	AN
CommonNumberSeparator	CS
BlockSeparator	B
SegmentSeparator	S
WhiteSpaceNeutral	WS
OtherNeutral	ON
LeftToRightEmbedding	LRE
LeftToRightOverride	LRO
RightToLeftArabic	AL
RightToLeftEmbedding	RLE
RightToLeftOverride	RLO
PopDirectionalFormat	PDF
DirNonSpacingMark	NSM
BoundaryNeutral	BN
EndBidirectionalCategory

Enumerator
NoBlock
BeginBlock
BasicLatinBlock
Latin1SupplementBlock
LatinExtendedABlock
LatinExtendedBBlock
IpaExtensionsBlock
SpacingModifierLettersBlock
CombiningDiacriticalMarksBlock
GreekAndCopticBlock
CyrillicBlock
CyrillicSupplementalblock	Defined under Unicode 3.2
ArmenianBlock
HebrewBlock
ArabicBlock
SyriacBlock
ThaanaBlock
DevanagariBlock
BengaliBlock
GurmukhiBlock
GujaratiBlock
OriyaBlock
TamilBlock
TeluguBlock
KannadaBlock
MalayalamBlock
SinhalaBlock
ThaiBlock
LaoBlock
TibetanBlock
MyanmarBlock
GeorgianBlock
HangulJamoBlock
EthiopicBlock
CherokeeBlock
UnifiedCanadianAboriginalSyllabicsBlock
OghamBlock
RunicBlock
TagalogBlock
HanunooBlock
BuhidBlock
TagbanwaBlock
KhmerBlock
MongolianBlock
LatinExtendedAdditionalBlock
GreekExtendedBlock
GeneralPunctuationBlock
SuperscriptsAndSubscriptsBlock
CurrencySymbolsBlock
CombiningDiacriticalMarksForSymbolsBlock
LetterlikeSymbolsBlock
NumberFormsBlock
ArrowsBlock
MathematicalOperatorsBlock
MiscellaneousTechnicalBlock
ControlPicturesBlock
OpticalCharacterRecognitionBlock
EnclosedAlphanumericsBlock
BoxDrawingBlock
BlockElementsBlock
GeometricShapesBlock
MiscellaneousSymbolsBlock
DingbatsBlock
MiscellaneousMathematicalSymbolsABlock
SupplementalArrowsABlock
BraillePatternsBlock
SupplementalArrowsBBlock
MiscellaneousMathematicalSymbolsBBlock
SupplementalMathematicalOperatorsBlock
CjkRadicalsSupplementBlock
KangxiRadicalsBlock
IdeographicDescriptionCharactersBlock
CjkSymbolsAndPunctuationBlock
HiraganaBlock
KatakanaBlock
BopomofoBlock
HangulCompatibilityJamoBlock
KanbunBlock
BopomofoExtendedBlock
KatakanaPhoneticExtensionsBlock
EnclosedCjkLettersAndMonthsBlock
CjkCompatibilityBlock
CjkUnifiedIdeographsExtensionABlock
CjkUnifiedIdeographsBlock
YiSyllablesBlock
YiRadicalsBlock
HangulSyllablesBlock
HighSurrogatesBlock
HighPrivateUseSurrogatesBlock
LowSurrogatesBlock
PrivateUseAreaBlock
CjkCompatibilityIdeographsBlock
AlphabeticPresentationFormsBlock
ArabicPresentationFormsABlock
VariationSelectorsBlock
CombiningHalfMarksBlock
CjkCompatibilityFormsBlock
SmallFormVariantsBlock
ArabicPresentationFormsBBlock
HalfwidthAndFullwidthFormsBlock
SpecialsBlock
OldItalicBlock
GothicBlock
DeseretBlock
ByzantineMusicalSymbolsBlock
MusicalSymbolsBlock
MathematicalAlphanumericSymbolsBlock
CjkUnifiedIdeographsExtensionBBlock
CjkCompatibilityIdeographsSupplementBlock
TagsBlock
PrivateUseBlock
SupplementaryPrivateUseAreaABlock
SupplementaryPrivateUseAreaBBlock
InvalidBlock
EndBlock

Enumerator
BeginCombiningClass
BaseEquivalent
Spacing
Nonspacing
Split
Enclosing
Reordrant
TibetanSubjoined
OverlayInterior
Nuktas
KanaVoicingMarks
Viramas
StartOfFixedPositionClasses
HebrewPointSheva
HebrewPointHatafSegol
HebrewPointHatafPatah
HebrewPointHatafQamats
HebrewPointHiriq
HebrewPointTsere
HebrewPointSegol
HebrewPointPatah
HebrewPointQamats
HebrewPointHolam
HebrewPointQubuts
HebrewPointDagishOrMapiq
HebrewPointMeteg
HebrewPointRafe
HebrewPointShinDot
HebrewPointSinDot
HebrewPointJudeoSpanishVarika
ArabicFathatan
ArabicDammatan
ArabicKasratan
ArabicFatha
ArabicDamma
ArabicKasra
ArabicShadda
ArabicSukun
ArabicLetterSuperscriptAlef
SyriacLetterSuperscriptAlaph
TeluguLengthMark
TeluguAiLengthMark
ThaiCharacterSaraU
ThaiCharacterSaraUu
ThaiCharacterMaiEk
ThaiCharacterMaiTho
ThaiCharacterMaiTri
ThaiCharacterMaiChattawa
LaoVowelSignU
LaoVowelSignUu
LaoToneMaiEk
LaoToneMaiTho
LaoToneMaiTi
LaoToneMaiCatawa
TibetanVowelSignAa
TibetanVowelSignI
TibetanVowelSignE
TibetanVowelSignEe
TibetanVowelSignO
TibetanVowelSignOo
TibetanVowelSignReversedI
TibetanVowelSignU
EndOfFixedPositionClasses
BelowLeftAttached
BelowAttached
BelowRightAttached
LeftAttached
RightAttached
AboveLeftAttached
AboveAttached
AboveRightAttached
BelowLeft
Below
BelowRight
Left
Right
AboveLeft
Above
AboveRight
DoubleBelow
DoubleAbove
IotaSubscript
EndCombiningClass
NoCombiningClass

Enumerator
NeutralWidth	A value used to identify characters that do not occur in legacy East Asian character sets. By extension, they also do not occur in East Asian typography.
AmbiguousWidth	All characters that can be sometimes wide and sometimes narrow. Ambiguous characters occur in East Asian legacy character sets as wide characters, but as narrow characters in non-EastAsian usage. This set includes Greek and Cyrillic alphabet symbols found in East Asian character sets Some mathematical symbols Private Use characters
HalfWidth	All characters that are explicitly defined as HALF WIDTH in the Unicode Standard by having a compatibility decomposition of type <narrow> to characters elsewhere in the Unicode Standard that are implicitly wide but unmarked, plus the WON SIGN.
FullWidth	All characters that are defined as FULL WIDTH in the Unicode Standard by having a compatibility decomposition of type <wide> to characters elsewhere in the Unicode Standard that are implicitly narrow but unmarked.
NarrowWidth	All other characters that are always narrow and have explicit full-width or wide counterparts. These characters are implicitly narrow in East Asian typography and legacy character sets since they have explicit full-width or wide counterparts. All of US-ASCII is an example of East Asian Narrow characters.
WideWidth	All other characters that are always wide. These characters occur only in the context of East Asian typography where they are wide characters (such as the Unified Han Ideographs or Squared Katakana Symbols). This category includes characters that have explicit half-width counterparts.

Enumerator
BeginGeneralCategory
Unassigned
UppercaseLetter	Lu (Normative)
LowercaseLetter	Ll (Normative)
TitlecaseLetter	Lt (Normative)
ModifierLetter	Lm (Informative)
OtherLetter	Lo (Informative)
NonSpacingMark	Mn (Normative)
EnclosingMark	Me (Normative)
CombiningSpacingMark	Mc (Normative)
DecimalDigitNumber	Nd (Normative)
LetterNumber	Nl (Normative)
OtherNumber	No (Normative)
SpaceSeparator	Zs (Normative)
LineSeparator	Zl (Normative)
ParagraphSeparator	Zp (Normative)
ControlChar	Cc (Normative)
FormatChar	Cf (Normative)
PrivateUseChar	Co (Normative)
Surrogate	Cs (Normative)
DashPunctuation	Pd (Informative)
StartPunctuation	Ps (Informative)
EndPunctuation	Pe (Informative)
ConnectorPunctuation	Pc (Informative)
OtherPunctuation	Po (Informative)
MathSymbol	Sm (Informative)
CurrencySymbol	Sc (Informative)
ModifierSymbol	Sk (Informative)
OtherSymbol	So (Informative)
InitialPunctuation	Pi (Informative)
FinalPunctuation	Pf (Informative)
GeneralOtherTypes	Cn (Normative)
EndGeneralCategory

Enumerator
BeginScript
Latin
Greek
Cyrillic
Armenian
Hebrew
Arabic
Syriac
Thaana
Devanagari
Bengali
Gurmukhi
Gujarati
Oriya
Tamil
Telugu
Kannada
Malayalam
Sinhala
Thai
Lao
Tibetan
Myanmar
Georgian
Hangul
Ethiopic
Cherokee
CanadianAboriginal
Ogham
Runic
Khmer
Mongolian
Hiragana
Katakana
Bopomofo
Han
Yi
OldItalic
Gothic
Deseret
Inherited
NoScript
InvalidScript
EndScript

SourcePro® API Reference Guide

Public Types

Static Public Member Functions

Detailed Description

Member Enumeration Documentation

Member Function Documentation