std::codecvt_utf8_utf16

Header: <codecvt>

std::codecvt_utf8_utf16 is a std::codecvt facet which encapsulates conversion between a UTF-8 encoded byte string and UTF-16 encoded character string. If Elem is a 32-bit type, one UTF-16 code unit will be stored in each 32-bit character of the output sequence.

# Declarations

template<
class Elem,
unsigned long Maxcode = 0x10ffff,
std::codecvt_mode Mode = (std::codecvt_mode)0 >
class codecvt_utf8_utf16
: public std::codecvt<Elem, char, std::mbstate_t>;

(since C++11) (deprecated in C++17) (removed in C++26)

# Parameters

refs: the number of references that link to the facet

# Example

#include <cassert>
#include <codecvt>
#include <cstdint>
#include <iostream>
#include <locale>
#include <string>
 
int main()
{
    std::string u8 = "z\u00df\u6c34\U0001f34c";
    std::u16string u16 = u"z\u00df\u6c34\U0001f34c";
 
    // UTF-8 to UTF-16/char16_t
    std::u16string u16_conv = std::wstring_convert<
        std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(u8);
    assert(u16 == u16_conv);
    std::cout << "UTF-8 to UTF-16 conversion produced " << u16_conv.size()
              << " code units:\n" << std::showbase << std::hex;
    for (char16_t c : u16_conv)
        std::cout << static_cast<std::uint16_t>(c) << ' ';
 
    // UTF-16/char16_t to UTF-8
    std::string u8_conv = std::wstring_convert<
        std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16);
    assert(u8 == u8_conv);
    std::cout << "\nUTF-16 to UTF-8 conversion produced "
              << std::dec << u8_conv.size() << " bytes:\n" << std::hex;
    for (char c : u8_conv)
        std::cout << +static_cast<unsigned char>(c) << ' ';
    std::cout << '\n';
}

# Defect reports

DR	Applied to	Behavior as published	Correct behavior
LWG 2229	C++98	the constructor and destructor were not specified	specifies them