|
template<typename IteratorType > |
auto | decode_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< codepoint_type > |
|
template<char UnicodePrefix, typename IteratorType > |
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto | escape_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< string_type > |
|
template<typename IteratorType > |
auto | unescape_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< string_type > |
|
template<char UnicodePrefix> |
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto | escape_codepoint (codepoint_type codepoint) -> string_type |
|
template<char UnicodePrefix, typename IteratorType > |
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto | unescape_codepoint (IteratorType &it, const IteratorType &end) -> codepoint_type |
|
template<typename IteratorType > |
requires fly::SizeOfTypeIs< CharType, 1 > auto | codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type |
|
template<typename IteratorType > |
requires fly::SizeOfTypeIs< CharType, 2 > auto | codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type |
|
template<typename IteratorType > |
requires fly::SizeOfTypeIs< CharType, 4 > auto | codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type |
|
template<typename OutputIteratorType > |
requires fly::SizeOfTypeIs< CharType, 1 > void | codepoint_to_string (codepoint_type codepoint, OutputIteratorType out) |
|
template<typename OutputIteratorType > |
requires fly::SizeOfTypeIs< CharType, 2 > void | codepoint_to_string (codepoint_type codepoint, OutputIteratorType out) |
|
template<typename OutputIteratorType > |
requires fly::SizeOfTypeIs< CharType, 4 > void | codepoint_to_string (codepoint_type codepoint, OutputIteratorType out) |
|
template<typename IteratorType > |
auto | next_encoded_byte (IteratorType &it, const IteratorType &end) -> codepoint_type |
|
|
template<typename IteratorType > |
static bool | validate_encoding (IteratorType &it, const IteratorType &end) |
|
template<typename DesiredStringType > |
static std::optional< DesiredStringType > | convert_encoding (view_type value) |
|
template<typename DesiredStringType , typename OutputIteratorType > |
static bool | convert_encoding_into (view_type value, OutputIteratorType out) |
|
template<typename IteratorType > |
static std::optional< codepoint_type > | decode_codepoint (IteratorType &it, const IteratorType &end) |
|
static std::optional< string_type > | encode_codepoint (codepoint_type codepoint) |
|
template<char UnicodePrefix = 'U', typename IteratorType > |
requires static fly::UnicodePrefixCharacter< UnicodePrefix > std::optional< string_type > | escape_codepoint (IteratorType &it, const IteratorType &end) |
|
template<typename IteratorType > |
static std::optional< string_type > | unescape_codepoint (IteratorType &it, const IteratorType &end) |
|
template<fly::StandardCharacter CharType>
class fly::detail::BasicUnicode< CharType >
Helper class for decoding and encoding Unicode codepoints in a std::basic_string. The assumed Unicode encoding depends on the template type character type:
1. char - UTF-8
2. wchar_t - UTF-16 on Windows, UTF-32 on Linux and macOS
3. char8_t - UTF-8
4. char16_t - UTF-16
5. char32_t - UTF-32
- Author
- Timothy Flynn (trfly.nosp@m.nn89.nosp@m.@pm.m.nosp@m.e)
- Version
- June 6, 2020
template<fly::StandardCharacter CharType>
template<typename IteratorType >
static std::optional<codepoint_type> fly::detail::BasicUnicode< CharType >::decode_codepoint |
( |
IteratorType & |
it, |
|
|
const IteratorType & |
end |
|
) |
| |
|
static |
Decode a single Unicode codepoint, starting at the character pointed to by the provided iterator. If successful, after invoking this method, that iterator will point at the first character after the Unicode codepoint in the source string.
- Template Parameters
-
IteratorType | The type of the encoded Unicode codepoint's iterator. |
- Parameters
-
it | Pointer to the beginning of the encoded Unicode codepoint. |
end | Pointer to the end of the encoded Unicode codepoint. |
- Returns
- If successful, the decoded Unicode codepoint. Otherwise, an uninitialized value.
template<fly::StandardCharacter CharType>
template<char UnicodePrefix = 'U', typename IteratorType >
requires static fly::UnicodePrefixCharacter<UnicodePrefix> std::optional<string_type> fly::detail::BasicUnicode< CharType >::escape_codepoint |
( |
IteratorType & |
it, |
|
|
const IteratorType & |
end |
|
) |
| |
|
static |
Escape a single Unicode codepoint, starting at the character pointed to by the provided iterator. If successful, after invoking this method, that iterator will point at the first character after the Unicode codepoint in the source string.
If the Unicode codepoint is an ASCII, non-control character (i.e. codepoints in the range [U+0020, U+007E]), that character is not escaped.
If the Unicode codepoint is non-ASCII or a control character (i.e. codepoints in the range [U+0000, U+001F] or [U+007F, U+10FFFF]), the codepoint is encoded as follows, taking into consideration the provided Unicode prefix character:
1. If the Unicode codepoint is in the range [U+0000, U+001F] or [U+007F, U+FFFF],
regardless of the prefix character, the encoding will be of the form \unnnn.
2. If the codepoint is in the range [U+10000, U+10FFFF], and the prefix character is 'u',
the encoding will be a surrogate pair of the form \unnnn\unnnn.
3. If the codepoint is in the range [U+10000, U+10FFFF], and the prefix character is 'U',
the encoding will of the form \Unnnnnnnn.
- Template Parameters
-
UnicodePrefix | The Unicode prefix character ('u' or 'U'). |
IteratorType | The type of the encoded Unicode codepoint's iterator. |
- Parameters
-
it | Pointer to the beginning of the encoded Unicode codepoint. |
end | Pointer to the end of the encoded Unicode codepoint. |
- Returns
- If successful, a string containing the escaped Unicode codepoint. Otherwise, an uninitialized value.
template<fly::StandardCharacter CharType>
template<typename IteratorType >
static std::optional<string_type> fly::detail::BasicUnicode< CharType >::unescape_codepoint |
( |
IteratorType & |
it, |
|
|
const IteratorType & |
end |
|
) |
| |
|
static |
Unescape a single Unicode codepoint, starting at the character pointed to by provided iterator. If successful, after invoking this method, that iterator will point at the first character after the escaped sequence in the source string.
Accepts escaped sequences of the following forms:
1. \unnnn for Unicode codepoints in the range [U+0000, U+FFFF].
2. \unnnn\unnnn surrogate pairs for Unicode codepoints in the range [U+10000, U+10FFFF].
3. \Unnnnnnnn for all Unicode codepoints.
- Template Parameters
-
IteratorType | The type of the escaped Unicode string's iterator. |
- Parameters
-
it | Pointer to the beginning of the escaped character sequence. |
end | Pointer to the end of the escaped character sequence. |
- Returns
- If successful, a string containing the unescaped Unicode codepoint. Otherwise, an uninitialized value.