libfly  6.2.2
C++20 utility library for Linux, macOS, and Windows
fly::detail::BasicUnicode< CharType > Class Template Reference

#include <unicode.hpp>

Public Member Functions

template<typename IteratorType >
auto decode_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< codepoint_type >
 
template<char UnicodePrefix, typename IteratorType >
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto escape_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< string_type >
 
template<typename IteratorType >
auto unescape_codepoint (IteratorType &it, const IteratorType &end) -> std::optional< string_type >
 
template<char UnicodePrefix>
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto escape_codepoint (codepoint_type codepoint) -> string_type
 
template<char UnicodePrefix, typename IteratorType >
requires fly::UnicodePrefixCharacter< UnicodePrefix > auto unescape_codepoint (IteratorType &it, const IteratorType &end) -> codepoint_type
 
template<typename IteratorType >
requires fly::SizeOfTypeIs< CharType, 1 > auto codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type
 
template<typename IteratorType >
requires fly::SizeOfTypeIs< CharType, 2 > auto codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type
 
template<typename IteratorType >
requires fly::SizeOfTypeIs< CharType, 4 > auto codepoint_from_string (IteratorType &it, const IteratorType &end) -> codepoint_type
 
template<typename OutputIteratorType >
requires fly::SizeOfTypeIs< CharType, 1 > void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out)
 
template<typename OutputIteratorType >
requires fly::SizeOfTypeIs< CharType, 2 > void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out)
 
template<typename OutputIteratorType >
requires fly::SizeOfTypeIs< CharType, 4 > void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out)
 
template<typename IteratorType >
auto next_encoded_byte (IteratorType &it, const IteratorType &end) -> codepoint_type
 

Static Public Member Functions

template<typename IteratorType >
static bool validate_encoding (IteratorType &it, const IteratorType &end)
 
template<typename DesiredStringType >
static std::optional< DesiredStringType > convert_encoding (view_type value)
 
template<typename DesiredStringType , typename OutputIteratorType >
static bool convert_encoding_into (view_type value, OutputIteratorType out)
 
template<typename IteratorType >
static std::optional< codepoint_type > decode_codepoint (IteratorType &it, const IteratorType &end)
 
static std::optional< string_type > encode_codepoint (codepoint_type codepoint)
 
template<char UnicodePrefix = 'U', typename IteratorType >
requires static fly::UnicodePrefixCharacter< UnicodePrefix > std::optional< string_type > escape_codepoint (IteratorType &it, const IteratorType &end)
 
template<typename IteratorType >
static std::optional< string_type > unescape_codepoint (IteratorType &it, const IteratorType &end)
 

Detailed Description

template<fly::StandardCharacter CharType>
class fly::detail::BasicUnicode< CharType >

Helper class for decoding and encoding Unicode codepoints in a std::basic_string. The assumed Unicode encoding depends on the template type character type:

1. char - UTF-8
2. wchar_t - UTF-16 on Windows, UTF-32 on Linux and macOS
3. char8_t - UTF-8
4. char16_t - UTF-16
5. char32_t - UTF-32
Author
Timothy Flynn (trfly.nosp@m.nn89.nosp@m.@pm.m.nosp@m.e)
Version
June 6, 2020

Member Function Documentation

◆ convert_encoding()

template<fly::StandardCharacter CharType>
template<typename DesiredStringType >
std::optional< DesiredStringType > fly::detail::BasicUnicode< CharType >::convert_encoding ( view_type  value)
inlinestatic

Convert the Unicode encoding of a string to another encoding.

Template Parameters
DesiredStringTypeThe type of string to convert to.
Parameters
valueThe encoded Unicode string to convert.
Returns
If successful, a copy of the source string with the desired encoding. Otherwise, an uninitialized value.

◆ convert_encoding_into()

template<fly::StandardCharacter CharType>
template<typename DesiredStringType , typename OutputIteratorType >
bool fly::detail::BasicUnicode< CharType >::convert_encoding_into ( view_type  value,
OutputIteratorType  out 
)
static

Convert the Unicode encoding of a string to another encoding, inserting the result into the provided output iterator.

Template Parameters
DesiredStringTypeThe type of string to convert to.
OutputIteratorTypeThe type of the output iterator to insert the result into.
Parameters
itPointer to the beginning of the encoded Unicode string.
endPointer to the end of the encoded Unicode string.
outThe output iterator to insert the result into.
Returns
Whether the conversion was successful.

◆ decode_codepoint()

template<fly::StandardCharacter CharType>
template<typename IteratorType >
static std::optional<codepoint_type> fly::detail::BasicUnicode< CharType >::decode_codepoint ( IteratorType &  it,
const IteratorType &  end 
)
static

Decode a single Unicode codepoint, starting at the character pointed to by the provided iterator. If successful, after invoking this method, that iterator will point at the first character after the Unicode codepoint in the source string.

Template Parameters
IteratorTypeThe type of the encoded Unicode codepoint's iterator.
Parameters
itPointer to the beginning of the encoded Unicode codepoint.
endPointer to the end of the encoded Unicode codepoint.
Returns
If successful, the decoded Unicode codepoint. Otherwise, an uninitialized value.

◆ encode_codepoint()

template<fly::StandardCharacter CharType>
auto fly::detail::BasicUnicode< CharType >::encode_codepoint ( codepoint_type  codepoint)
static

Encode a single Unicode codepoint.

Returns
The Unicode codepoint to encode.
If successful, a string containing the encoded Unicode codepoint. Otherwise, an uninitialized value.

◆ escape_codepoint()

template<fly::StandardCharacter CharType>
template<char UnicodePrefix = 'U', typename IteratorType >
requires static fly::UnicodePrefixCharacter<UnicodePrefix> std::optional<string_type> fly::detail::BasicUnicode< CharType >::escape_codepoint ( IteratorType &  it,
const IteratorType &  end 
)
static

Escape a single Unicode codepoint, starting at the character pointed to by the provided iterator. If successful, after invoking this method, that iterator will point at the first character after the Unicode codepoint in the source string.

If the Unicode codepoint is an ASCII, non-control character (i.e. codepoints in the range [U+0020, U+007E]), that character is not escaped.

If the Unicode codepoint is non-ASCII or a control character (i.e. codepoints in the range [U+0000, U+001F] or [U+007F, U+10FFFF]), the codepoint is encoded as follows, taking into consideration the provided Unicode prefix character:

1. If the Unicode codepoint is in the range [U+0000, U+001F] or [U+007F, U+FFFF],
   regardless of the prefix character, the encoding will be of the form \unnnn.
2. If the codepoint is in the range [U+10000, U+10FFFF], and the prefix character is 'u',
   the encoding will be a surrogate pair of the form \unnnn\unnnn.
3. If the codepoint is in the range [U+10000, U+10FFFF], and the prefix character is 'U',
   the encoding will of the form \Unnnnnnnn.
Template Parameters
UnicodePrefixThe Unicode prefix character ('u' or 'U').
IteratorTypeThe type of the encoded Unicode codepoint's iterator.
Parameters
itPointer to the beginning of the encoded Unicode codepoint.
endPointer to the end of the encoded Unicode codepoint.
Returns
If successful, a string containing the escaped Unicode codepoint. Otherwise, an uninitialized value.

◆ unescape_codepoint()

template<fly::StandardCharacter CharType>
template<typename IteratorType >
static std::optional<string_type> fly::detail::BasicUnicode< CharType >::unescape_codepoint ( IteratorType &  it,
const IteratorType &  end 
)
static

Unescape a single Unicode codepoint, starting at the character pointed to by provided iterator. If successful, after invoking this method, that iterator will point at the first character after the escaped sequence in the source string.

Accepts escaped sequences of the following forms:

1. \unnnn for Unicode codepoints in the range [U+0000, U+FFFF].
2. \unnnn\unnnn surrogate pairs for Unicode codepoints in the range [U+10000, U+10FFFF].
3. \Unnnnnnnn for all Unicode codepoints.
Template Parameters
IteratorTypeThe type of the escaped Unicode string's iterator.
Parameters
itPointer to the beginning of the escaped character sequence.
endPointer to the end of the escaped character sequence.
Returns
If successful, a string containing the unescaped Unicode codepoint. Otherwise, an uninitialized value.

◆ validate_encoding()

template<fly::StandardCharacter CharType>
template<typename IteratorType >
bool fly::detail::BasicUnicode< CharType >::validate_encoding ( IteratorType &  it,
const IteratorType &  end 
)
static

Validate that a string is strictly Unicode compliant.

Template Parameters
IteratorTypeThe type of the encoded Unicode string's iterator.
Parameters
itPointer to the beginning of the encoded Unicode string.
endPointer to the end of the encoded Unicode string.
Returns
True if the string is Unicode compliant.

The documentation for this class was generated from the following file: