From 48bfe13e4feda309bcbfadf97bb8c011402f2cf8 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:19:26 +0300 Subject: [PATCH] Add methods to decode/encode multibyte encodings. --- core/os/os.cpp | 8 ++ core/os/os.h | 3 + core/string/ustring.cpp | 6 + core/string/ustring.h | 1 + core/variant/variant_call.cpp | 12 +- doc/classes/PackedByteArray.xml | 10 ++ doc/classes/String.xml | 10 ++ doc/classes/StringName.xml | 10 ++ drivers/unix/os_unix.cpp | 124 ++++++++++++++++++++ drivers/unix/os_unix.h | 31 +++++ platform/windows/os_windows.cpp | 200 ++++++++++++++++++++++++++++++++ platform/windows/os_windows.h | 6 + 12 files changed, 420 insertions(+), 1 deletion(-) diff --git a/core/os/os.cpp b/core/os/os.cpp index c67c28de88b..385d1f44801 100644 --- a/core/os/os.cpp +++ b/core/os/os.cpp @@ -199,6 +199,14 @@ void OS::set_stderr_enabled(bool p_enabled) { _stderr_enabled = p_enabled; } +String OS::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const { + return String(); +} + +PackedByteArray OS::string_to_multibyte(const String &p_encoding, const String &p_string) const { + return PackedByteArray(); +} + int OS::get_exit_code() const { return _exit_code; } diff --git a/core/os/os.h b/core/os/os.h index 49028b9cad6..51ea976d58e 100644 --- a/core/os/os.h +++ b/core/os/os.h @@ -265,6 +265,9 @@ public: virtual void set_crash_handler_silent() { _silent_crash_handler = true; } virtual bool is_crash_handler_silent() { return _silent_crash_handler; } + virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const; + virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const; + virtual void disable_crash_handler() {} virtual bool is_disable_crash_handler() const { return false; } virtual void initialize_debugging() {} diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index ff81d329e4c..fb9687feedf 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -34,6 +34,8 @@ #include "core/math/color.h" #include "core/math/math_funcs.h" #include "core/object/object.h" +#include "core/os/memory.h" +#include "core/os/os.h" #include "core/string/print_string.h" #include "core/string/string_name.h" #include "core/string/translation_server.h" @@ -5987,6 +5989,10 @@ Vector String::to_wchar_buffer() const { #endif } +Vector String::to_multibyte_char_buffer(const String &p_encoding) const { + return OS::get_singleton()->string_to_multibyte(p_encoding, *this); +} + #ifdef TOOLS_ENABLED /** * "Tools TRanslate". Performs string replacement for internationalization diff --git a/core/string/ustring.h b/core/string/ustring.h index a7ce37d4de1..c8b4449246d 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -618,6 +618,7 @@ public: Vector to_utf16_buffer() const; Vector to_utf32_buffer() const; Vector to_wchar_buffer() const; + Vector to_multibyte_char_buffer(const String &p_encoding = String()) const; // Constructors for NULL terminated C strings. String(const char *p_cstr) { diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp index 56253139593..ffff1f69f04 100644 --- a/core/variant/variant_call.cpp +++ b/core/variant/variant_call.cpp @@ -734,6 +734,14 @@ struct _VariantCall { return s; } + static String func_PackedByteArray_get_string_from_multibyte_char(PackedByteArray *p_instance, const String &p_encoding) { + String s; + if (p_instance->size() > 0) { + s = OS::get_singleton()->multibyte_to_string(p_encoding, *p_instance); + } + return s; + } + static PackedByteArray func_PackedByteArray_compress(PackedByteArray *p_instance, int p_mode) { PackedByteArray compressed; @@ -1815,8 +1823,9 @@ static void _register_variant_builtin_methods_string() { bind_string_method(to_utf8_buffer, sarray(), varray()); bind_string_method(to_utf16_buffer, sarray(), varray()); bind_string_method(to_utf32_buffer, sarray(), varray()); - bind_string_method(hex_decode, sarray(), varray()); bind_string_method(to_wchar_buffer, sarray(), varray()); + bind_string_method(to_multibyte_char_buffer, sarray("encoding"), varray(String())); + bind_string_method(hex_decode, sarray(), varray()); bind_static_method(String, num_scientific, sarray("number"), varray()); bind_static_method(String, num, sarray("number", "decimals"), varray(-1)); @@ -2458,6 +2467,7 @@ static void _register_variant_builtin_methods_array() { bind_function(PackedByteArray, get_string_from_utf16, _VariantCall::func_PackedByteArray_get_string_from_utf16, sarray(), varray()); bind_function(PackedByteArray, get_string_from_utf32, _VariantCall::func_PackedByteArray_get_string_from_utf32, sarray(), varray()); bind_function(PackedByteArray, get_string_from_wchar, _VariantCall::func_PackedByteArray_get_string_from_wchar, sarray(), varray()); + bind_function(PackedByteArray, get_string_from_multibyte_char, _VariantCall::func_PackedByteArray_get_string_from_multibyte_char, sarray("encoding"), varray(String())); bind_function(PackedByteArray, hex_encode, _VariantCall::func_PackedByteArray_hex_encode, sarray(), varray()); bind_function(PackedByteArray, compress, _VariantCall::func_PackedByteArray_compress, sarray("compression_mode"), varray(0)); bind_function(PackedByteArray, decompress, _VariantCall::func_PackedByteArray_decompress, sarray("buffer_size", "compression_mode"), varray(0)); diff --git a/doc/classes/PackedByteArray.xml b/doc/classes/PackedByteArray.xml index 552f35f3de8..5a0ba4ce082 100644 --- a/doc/classes/PackedByteArray.xml +++ b/doc/classes/PackedByteArray.xml @@ -327,6 +327,16 @@ Converts ASCII/Latin-1 encoded array to [String]. Fast alternative to [method get_string_from_utf8] if the content is ASCII/Latin-1 only. Unlike the UTF-8 function this function maps every byte to a character in the array. Multibyte sequences will not be interpreted correctly. For parsing user input always use [method get_string_from_utf8]. This is the inverse of [method String.to_ascii_buffer]. + + + + + Converts system multibyte code page encoded array to [String]. If conversion fails, empty string is returned. This is the inverse of [method String.to_multibyte_char_buffer]. + The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used. + - For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names. + - For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings. + + diff --git a/doc/classes/String.xml b/doc/classes/String.xml index 4f659a0a14b..9001f708ea3 100644 --- a/doc/classes/String.xml +++ b/doc/classes/String.xml @@ -1030,6 +1030,16 @@ Returns the string converted to [code]lowercase[/code]. + + + + + Converts the string to system multibyte code page encoded [PackedByteArray]. If conversion fails, empty array is returned. + The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used. + - For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names. + - For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings. + + diff --git a/doc/classes/StringName.xml b/doc/classes/StringName.xml index 5b8c7a62891..e0c84bd6708 100644 --- a/doc/classes/StringName.xml +++ b/doc/classes/StringName.xml @@ -938,6 +938,16 @@ Returns the string converted to [code]lowercase[/code]. + + + + + Converts the string to system multibyte code page encoded [PackedByteArray]. If conversion fails, empty array is returned. + The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used. + - For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names. + - For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings. + + diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index a70be9326e8..e9aac43f88e 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -580,6 +580,126 @@ Dictionary OS_Unix::get_memory_info() const { return meminfo; } +#ifndef __GLIBC__ +void OS_Unix::_load_iconv() { +#if defined(MACOS_ENABLED) || defined(IOS_ENABLED) + String iconv_lib_aliases[] = { "/usr/lib/libiconv.2.dylib" }; + String iconv_func_aliases[] = { "iconv" }; + String charset_lib_aliases[] = { "/usr/lib/libcharset.1.dylib" }; +#else + String iconv_lib_aliases[] = { "", "libiconv.2.so", "libiconv.so" }; + String iconv_func_aliases[] = { "libiconv", "iconv", "bsd_iconv", "rpl_iconv" }; + String charset_lib_aliases[] = { "", "libcharset.1.so", "libcharset.so" }; +#endif + + for (size_t i = 0; i < sizeof(iconv_lib_aliases) / sizeof(iconv_lib_aliases[0]); i++) { + void *iconv_lib = iconv_lib_aliases[i].is_empty() ? RTLD_NEXT : dlopen(iconv_lib_aliases[i].utf8().get_data(), RTLD_NOW); + if (iconv_lib) { + for (size_t j = 0; j < sizeof(iconv_func_aliases) / sizeof(iconv_func_aliases[0]); j++) { + gd_iconv_open = (PIConvOpen)dlsym(iconv_lib, (iconv_func_aliases[j] + "_open").utf8().get_data()); + gd_iconv = (PIConv)dlsym(iconv_lib, (iconv_func_aliases[j]).utf8().get_data()); + gd_iconv_close = (PIConvClose)dlsym(iconv_lib, (iconv_func_aliases[j] + "_close").utf8().get_data()); + if (gd_iconv_open && gd_iconv && gd_iconv_close) { + break; + } + } + if (gd_iconv_open && gd_iconv && gd_iconv_close) { + break; + } + if (!iconv_lib_aliases[i].is_empty()) { + dlclose(iconv_lib); + } + } + } + + for (size_t i = 0; i < sizeof(charset_lib_aliases) / sizeof(charset_lib_aliases[0]); i++) { + void *cs_lib = charset_lib_aliases[i].is_empty() ? RTLD_NEXT : dlopen(charset_lib_aliases[i].utf8().get_data(), RTLD_NOW); + if (cs_lib) { + gd_locale_charset = (PIConvLocaleCharset)dlsym(cs_lib, "locale_charset"); + if (gd_locale_charset) { + break; + } + if (!charset_lib_aliases[i].is_empty()) { + dlclose(cs_lib); + } + } + } + _iconv_ok = gd_iconv_open && gd_iconv && gd_iconv_close && gd_locale_charset; +} +#endif + +String OS_Unix::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const { + ERR_FAIL_COND_V_MSG(!_iconv_ok, String(), "Conversion failed: Unable to load libiconv"); + + LocalVector chars; +#ifdef __GLIBC__ + gd_iconv_t ctx = gd_iconv_open("UTF-8", p_encoding.is_empty() ? nl_langinfo(CODESET) : p_encoding.utf8().get_data()); +#else + gd_iconv_t ctx = gd_iconv_open("UTF-8", p_encoding.is_empty() ? gd_locale_charset() : p_encoding.utf8().get_data()); +#endif + ERR_FAIL_COND_V_MSG(ctx == (gd_iconv_t)(-1), String(), "Conversion failed: Unknown encoding"); + + char *in_ptr = (char *)p_array.ptr(); + size_t in_size = p_array.size(); + + chars.resize(in_size); + char *out_ptr = (char *)chars.ptr(); + size_t out_size = chars.size(); + + while (gd_iconv(ctx, &in_ptr, &in_size, &out_ptr, &out_size) == (size_t)-1) { + if (errno != E2BIG) { + gd_iconv_close(ctx); + ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %d - %s", errno, strerror(errno))); + } + int64_t rate = (chars.size()) / (p_array.size() - in_size); + size_t oldpos = chars.size() - out_size; + chars.resize(chars.size() + in_size * rate); + out_ptr = (char *)chars.ptr() + oldpos; + out_size = chars.size() - oldpos; + } + chars.resize(chars.size() - out_size); + gd_iconv_close(ctx); + + return String::utf8((const char *)chars.ptr(), chars.size()); +} + +PackedByteArray OS_Unix::string_to_multibyte(const String &p_encoding, const String &p_string) const { + ERR_FAIL_COND_V_MSG(!_iconv_ok, PackedByteArray(), "Conversion failed: Unable to load libiconv"); + + CharString charstr = p_string.utf8(); + + PackedByteArray ret; +#ifdef __GLIBC__ + gd_iconv_t ctx = gd_iconv_open(p_encoding.is_empty() ? nl_langinfo(CODESET) : p_encoding.utf8().get_data(), "UTF-8"); +#else + gd_iconv_t ctx = gd_iconv_open(p_encoding.is_empty() ? gd_locale_charset() : p_encoding.utf8().get_data(), "UTF-8"); +#endif + ERR_FAIL_COND_V_MSG(ctx == (gd_iconv_t)(-1), PackedByteArray(), "Conversion failed: Unknown encoding"); + + char *in_ptr = (char *)charstr.ptr(); + size_t in_size = charstr.size(); + + ret.resize(in_size); + char *out_ptr = (char *)ret.ptrw(); + size_t out_size = ret.size(); + + while (gd_iconv(ctx, &in_ptr, &in_size, &out_ptr, &out_size) == (size_t)-1) { + if (errno != E2BIG) { + gd_iconv_close(ctx); + ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %d - %s", errno, strerror(errno))); + } + int64_t rate = (ret.size()) / (charstr.size() - in_size); + size_t oldpos = ret.size() - out_size; + ret.resize(ret.size() + in_size * rate); + out_ptr = (char *)ret.ptrw() + oldpos; + out_size = ret.size() - oldpos; + } + ret.resize(ret.size() - out_size); + gd_iconv_close(ctx); + + return ret; +} + Dictionary OS_Unix::execute_with_pipe(const String &p_path, const List &p_arguments, bool p_blocking) { #define CLEAN_PIPES \ if (pipe_in[0] >= 0) { \ @@ -1082,6 +1202,10 @@ void UnixTerminalLogger::log_error(const char *p_function, const char *p_file, i UnixTerminalLogger::~UnixTerminalLogger() {} OS_Unix::OS_Unix() { +#ifndef __GLIBC__ + _load_iconv(); +#endif + Vector loggers; loggers.push_back(memnew(UnixTerminalLogger)); _set_logger(memnew(CompositeLogger(loggers))); diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h index fe0dd1afc46..8fde52b5ff0 100644 --- a/drivers/unix/os_unix.h +++ b/drivers/unix/os_unix.h @@ -35,6 +35,21 @@ #include "core/os/os.h" #include "drivers/unix/ip_unix.h" +#ifdef __GLIBC__ +#include +#include +#define gd_iconv_t iconv_t +#define gd_iconv_open iconv_open +#define gd_iconv iconv +#define gd_iconv_close iconv_close +#else +typedef void *gd_iconv_t; +typedef gd_iconv_t (*PIConvOpen)(const char *, const char *); +typedef size_t (*PIConv)(gd_iconv_t, char **, size_t *, char **, size_t *); +typedef int (*PIConvClose)(gd_iconv_t); +typedef const char *(*PIConvLocaleCharset)(void); +#endif + class OS_Unix : public OS { struct ProcessInfo { mutable bool is_running = true; @@ -43,6 +58,19 @@ class OS_Unix : public OS { HashMap *process_map = nullptr; Mutex process_map_mutex; +#ifdef __GLIBC__ + bool _iconv_ok = true; +#else + bool _iconv_ok = false; + + PIConvOpen gd_iconv_open = nullptr; + PIConv gd_iconv = nullptr; + PIConvClose gd_iconv_close = nullptr; + PIConvLocaleCharset gd_locale_charset = nullptr; + + void _load_iconv(); +#endif + protected: // UNIX only handles the core functions. // inheriting platforms under unix (eg. X11) should handle the rest @@ -87,6 +115,9 @@ public: virtual Dictionary get_memory_info() const override; + virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const override; + virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const override; + virtual Error execute(const String &p_path, const List &p_arguments, String *r_pipe = nullptr, int *r_exitcode = nullptr, bool read_stderr = false, Mutex *p_pipe_mutex = nullptr, bool p_open_console = false) override; virtual Dictionary execute_with_pipe(const String &p_path, const List &p_arguments, bool p_blocking = true) override; virtual Error create_process(const String &p_path, const List &p_arguments, ProcessID *r_child_id = nullptr, bool p_open_console = false) override; diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp index a00f05944ea..fe2600d3459 100644 --- a/platform/windows/os_windows.cpp +++ b/platform/windows/os_windows.cpp @@ -973,6 +973,204 @@ static void _append_to_pipe(char *p_bytes, int p_size, String *r_pipe, Mutex *p_ } } +void OS_Windows::_init_encodings() { + encodings[""] = 0; + encodings["CP_ACP"] = 0; + encodings["CP_OEMCP"] = 1; + encodings["CP_MACCP"] = 2; + encodings["CP_THREAD_ACP"] = 3; + encodings["CP_SYMBOL"] = 42; + encodings["IBM037"] = 37; + encodings["IBM437"] = 437; + encodings["IBM500"] = 500; + encodings["ASMO-708"] = 708; + encodings["ASMO-449"] = 709; + encodings["DOS-710"] = 710; + encodings["DOS-720"] = 720; + encodings["IBM737"] = 737; + encodings["IBM775"] = 775; + encodings["IBM850"] = 850; + encodings["IBM852"] = 852; + encodings["IBM855"] = 855; + encodings["IBM857"] = 857; + encodings["IBM00858"] = 858; + encodings["IBM860"] = 860; + encodings["IBM861"] = 861; + encodings["DOS-862"] = 862; + encodings["IBM863"] = 863; + encodings["IBM864"] = 864; + encodings["IBM865"] = 865; + encodings["CP866"] = 866; + encodings["IBM869"] = 869; + encodings["IBM870"] = 870; + encodings["WINDOWS-874"] = 874; + encodings["CP875"] = 875; + encodings["SHIFT_JIS"] = 932; + encodings["GB2312"] = 936; + encodings["KS_C_5601-1987"] = 949; + encodings["BIG5"] = 950; + encodings["IBM1026"] = 1026; + encodings["IBM01047"] = 1047; + encodings["IBM01140"] = 1140; + encodings["IBM01141"] = 1141; + encodings["IBM01142"] = 1142; + encodings["IBM01143"] = 1143; + encodings["IBM01144"] = 1144; + encodings["IBM01145"] = 1145; + encodings["IBM01146"] = 1146; + encodings["IBM01147"] = 1147; + encodings["IBM01148"] = 1148; + encodings["IBM01149"] = 1149; + encodings["UTF-16"] = 1200; + encodings["UNICODEFFFE"] = 1201; + encodings["WINDOWS-1250"] = 1250; + encodings["WINDOWS-1251"] = 1251; + encodings["WINDOWS-1252"] = 1252; + encodings["WINDOWS-1253"] = 1253; + encodings["WINDOWS-1254"] = 1254; + encodings["WINDOWS-1255"] = 1255; + encodings["WINDOWS-1256"] = 1256; + encodings["WINDOWS-1257"] = 1257; + encodings["WINDOWS-1258"] = 1258; + encodings["JOHAB"] = 1361; + encodings["MACINTOSH"] = 10000; + encodings["X-MAC-JAPANESE"] = 10001; + encodings["X-MAC-CHINESETRAD"] = 10002; + encodings["X-MAC-KOREAN"] = 10003; + encodings["X-MAC-ARABIC"] = 10004; + encodings["X-MAC-HEBREW"] = 10005; + encodings["X-MAC-GREEK"] = 10006; + encodings["X-MAC-CYRILLIC"] = 10007; + encodings["X-MAC-CHINESESIMP"] = 10008; + encodings["X-MAC-ROMANIAN"] = 10010; + encodings["X-MAC-UKRAINIAN"] = 10017; + encodings["X-MAC-THAI"] = 10021; + encodings["X-MAC-CE"] = 10029; + encodings["X-MAC-ICELANDIC"] = 10079; + encodings["X-MAC-TURKISH"] = 10081; + encodings["X-MAC-CROATIAN"] = 10082; + encodings["UTF-32"] = 12000; + encodings["UTF-32BE"] = 12001; + encodings["X-CHINESE_CNS"] = 20000; + encodings["X-CP20001"] = 20001; + encodings["X_CHINESE-ETEN"] = 20002; + encodings["X-CP20003"] = 20003; + encodings["X-CP20004"] = 20004; + encodings["X-CP20005"] = 20005; + encodings["X-IA5"] = 20105; + encodings["X-IA5-GERMAN"] = 20106; + encodings["X-IA5-SWEDISH"] = 20107; + encodings["X-IA5-NORWEGIAN"] = 20108; + encodings["US-ASCII"] = 20127; + encodings["X-CP20261"] = 20261; + encodings["X-CP20269"] = 20269; + encodings["IBM273"] = 20273; + encodings["IBM277"] = 20277; + encodings["IBM278"] = 20278; + encodings["IBM280"] = 20280; + encodings["IBM284"] = 20284; + encodings["IBM285"] = 20285; + encodings["IBM290"] = 20290; + encodings["IBM297"] = 20297; + encodings["IBM420"] = 20420; + encodings["IBM423"] = 20423; + encodings["IBM424"] = 20424; + encodings["X-EBCDIC-KOREANEXTENDED"] = 20833; + encodings["IBM-THAI"] = 20838; + encodings["KOI8-R"] = 20866; + encodings["IBM871"] = 20871; + encodings["IBM880"] = 20880; + encodings["IBM905"] = 20905; + encodings["IBM00924"] = 20924; + encodings["EUC-JP"] = 20932; + encodings["X-CP20936"] = 20936; + encodings["X-CP20949"] = 20949; + encodings["CP1025"] = 21025; + encodings["KOI8-U"] = 21866; + encodings["ISO-8859-1"] = 28591; + encodings["ISO-8859-2"] = 28592; + encodings["ISO-8859-3"] = 28593; + encodings["ISO-8859-4"] = 28594; + encodings["ISO-8859-5"] = 28595; + encodings["ISO-8859-6"] = 28596; + encodings["ISO-8859-7"] = 28597; + encodings["ISO-8859-8"] = 28598; + encodings["ISO-8859-9"] = 28599; + encodings["ISO-8859-13"] = 28603; + encodings["ISO-8859-15"] = 28605; + encodings["X-EUROPA"] = 29001; + encodings["ISO-8859-8-I"] = 38598; + encodings["ISO-2022-JP"] = 50220; + encodings["CSISO2022JP"] = 50221; + encodings["ISO-2022-JP"] = 50222; + encodings["ISO-2022-KR"] = 50225; + encodings["X-CP50227"] = 50227; + encodings["EBCDIC-JP"] = 50930; + encodings["EBCDIC-US-JP"] = 50931; + encodings["EBCDIC-KR"] = 50933; + encodings["EBCDIC-CN-eXT"] = 50935; + encodings["EBCDIC-CN"] = 50936; + encodings["EBCDIC-US-CN"] = 50937; + encodings["EBCDIC-JP-EXT"] = 50939; + encodings["EUC-JP"] = 51932; + encodings["EUC-CN"] = 51936; + encodings["EUC-KR"] = 51949; + encodings["HZ-GB-2312"] = 52936; + encodings["GB18030"] = 54936; + encodings["X-ISCII-DE"] = 57002; + encodings["X-ISCII-BE"] = 57003; + encodings["X-ISCII-TA"] = 57004; + encodings["X-ISCII-TE"] = 57005; + encodings["X-ISCII-AS"] = 57006; + encodings["X-ISCII-OR"] = 57007; + encodings["X-ISCII-KA"] = 57008; + encodings["X-ISCII-MA"] = 57009; + encodings["X-ISCII-GU"] = 57010; + encodings["X-ISCII-PA"] = 57011; + encodings["UTF-7"] = 65000; + encodings["UTF-8"] = 65001; +} + +String OS_Windows::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const { + const int *encoding = encodings.getptr(p_encoding.to_upper()); + ERR_FAIL_NULL_V_MSG(encoding, String(), "Conversion failed: Unknown encoding"); + + LocalVector wchars; + int total_wchars = MultiByteToWideChar(*encoding, 0, (const char *)p_array.ptr(), p_array.size(), nullptr, 0); + if (total_wchars == 0) { + DWORD err_code = GetLastError(); + ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %s", format_error_message(err_code))); + } + wchars.resize(total_wchars); + if (MultiByteToWideChar(*encoding, 0, (const char *)p_array.ptr(), p_array.size(), wchars.ptr(), total_wchars) == 0) { + DWORD err_code = GetLastError(); + ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %s", format_error_message(err_code))); + } + + return String::utf16((const char16_t *)wchars.ptr(), wchars.size()); +} + +PackedByteArray OS_Windows::string_to_multibyte(const String &p_encoding, const String &p_string) const { + const int *encoding = encodings.getptr(p_encoding.to_upper()); + ERR_FAIL_NULL_V_MSG(encoding, PackedByteArray(), "Conversion failed: Unknown encoding"); + + Char16String charstr = p_string.utf16(); + PackedByteArray ret; + int total_mbchars = WideCharToMultiByte(*encoding, 0, (const wchar_t *)charstr.ptr(), charstr.size(), nullptr, 0, nullptr, nullptr); + if (total_mbchars == 0) { + DWORD err_code = GetLastError(); + ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %s", format_error_message(err_code))); + } + + ret.resize(total_mbchars); + if (WideCharToMultiByte(*encoding, 0, (const wchar_t *)charstr.ptr(), charstr.size(), (char *)ret.ptrw(), ret.size(), nullptr, nullptr) == 0) { + DWORD err_code = GetLastError(); + ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %s", format_error_message(err_code))); + } + + return ret; +} + Dictionary OS_Windows::get_memory_info() const { Dictionary meminfo; @@ -2463,6 +2661,8 @@ bool OS_Windows::_test_create_rendering_device_and_gl(const String &p_display_dr OS_Windows::OS_Windows(HINSTANCE _hInstance) { hInstance = _hInstance; + _init_encodings(); + // Reset CWD to ensure long path is used. Char16String current_dir_name; size_t str_len = GetCurrentDirectoryW(0, nullptr); diff --git a/platform/windows/os_windows.h b/platform/windows/os_windows.h index 0a0e6fb73da..a7df3e07bb2 100644 --- a/platform/windows/os_windows.h +++ b/platform/windows/os_windows.h @@ -136,6 +136,9 @@ class OS_Windows : public OS { bool is_using_con_wrapper() const; + HashMap encodings; + void _init_encodings(); + // functions used by main to initialize/deinitialize the OS protected: virtual void initialize() override; @@ -241,6 +244,9 @@ public: virtual bool _check_internal_feature_support(const String &p_feature) override; + virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const override; + virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const override; + virtual void disable_crash_handler() override; virtual bool is_disable_crash_handler() const override; virtual void initialize_debugging() override;