unicode does not support multiple word languare.

in Bug Reports
for the below code, the assert failed.
std::string ss = "´ïÃ÷Ò»ÅÅ=damingyipai";
std::wstring ws = Ice::stringToWstring( ss );
std::string ss2 = Ice::wstringToString( ws );
assert( ss2 == "´ïÃ÷Ò»ÅÅ=damingyipai" );
I think, if Ice direct call the API on windows be well:
namespace jf
{
namespace unicode
{
#if ( defined( WIN32) || defined(WIN64) )
unsigned int calcRequiredSize(const char* const srcText)
{
if ( ! srcText ) {
return 0;
}
// size_t retVal = -1;
//retVal = ::mbstowcs( 0, srcText, 0 );
size_t count = 0;
count = MultiByteToWideChar( GetACP(),
MB_PRECOMPOSED |
MB_ERR_INVALID_CHARS,
srcText,
-1,
NULL,
0 );
if ( count == (size_t)-1 ) {
return 0;
}
return (unsigned int)count;
}
unsigned int calcRequiredSize(const wchar_t* const srcText)
{
if (!srcText)
return 0;
BOOL defused = 0;
size_t count = 0;
count = WideCharToMultiByte( GetACP(),
0,
srcText,
-1,
NULL,
0,
NULL,
&defused );
if (count == (unsigned int)-1)
return 0;
return (unsigned int)count;
}
char* transcode(const wchar_t* const toTranscode)
{
if (!toTranscode)
return 0;
char* retVal = 0;
if (*toTranscode)
{
// Calc the needed size
//const size_t neededLen = ::wcstombs(0, toTranscode, 0);
size_t count = calcRequiredSize( toTranscode );
if (count == (unsigned int)-1 || 0 == count)
return 0;
count -= 1;
// Allocate a buffer of that size plus one for the null and transcode
retVal = new char[count + 1];
// ::wcstombs(retVal, toTranscode, neededLen + 1);
BOOL defused = 0;
WideCharToMultiByte( GetACP(),
0,
toTranscode,
-1,
retVal,
(int)count + 1,
NULL,
&defused );
// And cap it off anyway just to make sure
retVal[count] = 0;
}
else
{
retVal = new char[1];
retVal[0] = 0;
}
return retVal;
}
wchar_t* transcode(const char* const toTranscode)
{
if (!toTranscode)
return 0;
wchar_t* retVal = 0;
if (*toTranscode)
{
// Calculate the buffer size required
const unsigned int neededLen = calcRequiredSize(toTranscode);
if (neededLen == 0)
{
retVal = new wchar_t[1];
retVal[0] = 0;
return retVal;
}
// Allocate a buffer of that size plus one for the null and transcode
retVal = new wchar_t[neededLen + 1];
// ::mbstowcs(retVal, toTranscode, neededLen + 1);
MultiByteToWideChar( GetACP(),
MB_PRECOMPOSED,
toTranscode,
-1,
retVal,
neededLen );
// Cap it off just to make sure. We are so paranoid!
retVal[neededLen] = 0;
}
else
{
retVal = new wchar_t[1];
retVal[0] = 0;
}
return retVal;
}
#else
#error other platform
#endif
} // namespace unicode
thanks
std::string ss = "´ïÃ÷Ò»ÅÅ=damingyipai";
std::wstring ws = Ice::stringToWstring( ss );
std::string ss2 = Ice::wstringToString( ws );
assert( ss2 == "´ïÃ÷Ò»ÅÅ=damingyipai" );
I think, if Ice direct call the API on windows be well:
namespace jf
{
namespace unicode
{
#if ( defined( WIN32) || defined(WIN64) )
unsigned int calcRequiredSize(const char* const srcText)
{
if ( ! srcText ) {
return 0;
}
// size_t retVal = -1;
//retVal = ::mbstowcs( 0, srcText, 0 );
size_t count = 0;
count = MultiByteToWideChar( GetACP(),
MB_PRECOMPOSED |
MB_ERR_INVALID_CHARS,
srcText,
-1,
NULL,
0 );
if ( count == (size_t)-1 ) {
return 0;
}
return (unsigned int)count;
}
unsigned int calcRequiredSize(const wchar_t* const srcText)
{
if (!srcText)
return 0;
BOOL defused = 0;
size_t count = 0;
count = WideCharToMultiByte( GetACP(),
0,
srcText,
-1,
NULL,
0,
NULL,
&defused );
if (count == (unsigned int)-1)
return 0;
return (unsigned int)count;
}
char* transcode(const wchar_t* const toTranscode)
{
if (!toTranscode)
return 0;
char* retVal = 0;
if (*toTranscode)
{
// Calc the needed size
//const size_t neededLen = ::wcstombs(0, toTranscode, 0);
size_t count = calcRequiredSize( toTranscode );
if (count == (unsigned int)-1 || 0 == count)
return 0;
count -= 1;
// Allocate a buffer of that size plus one for the null and transcode
retVal = new char[count + 1];
// ::wcstombs(retVal, toTranscode, neededLen + 1);
BOOL defused = 0;
WideCharToMultiByte( GetACP(),
0,
toTranscode,
-1,
retVal,
(int)count + 1,
NULL,
&defused );
// And cap it off anyway just to make sure
retVal[count] = 0;
}
else
{
retVal = new char[1];
retVal[0] = 0;
}
return retVal;
}
wchar_t* transcode(const char* const toTranscode)
{
if (!toTranscode)
return 0;
wchar_t* retVal = 0;
if (*toTranscode)
{
// Calculate the buffer size required
const unsigned int neededLen = calcRequiredSize(toTranscode);
if (neededLen == 0)
{
retVal = new wchar_t[1];
retVal[0] = 0;
return retVal;
}
// Allocate a buffer of that size plus one for the null and transcode
retVal = new wchar_t[neededLen + 1];
// ::mbstowcs(retVal, toTranscode, neededLen + 1);
MultiByteToWideChar( GetACP(),
MB_PRECOMPOSED,
toTranscode,
-1,
retVal,
neededLen );
// Cap it off just to make sure. We are so paranoid!
retVal[neededLen] = 0;
}
else
{
retVal = new wchar_t[1];
retVal[0] = 0;
}
return retVal;
}
#else
#error other platform
#endif
} // namespace unicode
thanks
0
Comments
Have a look at: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
We not a well tools to generate UTF8 string, this question can appear in all country that use multibyte language.
can Ice give us UTF16 or MBCS support?
and, the slice has same problem, the string just support UTF8.... <:-( so, i can't write chinese words in slice constant statement.
of curse, this is not exigence, but I hope the Ice give us more powerfull support at feature, because i like it. :-)
Regarding string constants: You could use excape sequences to express UTF-8 strings in Slice. I admit that this is not very convenient, but on the other hand, string constants in Slice are used very rarely.
I have this on my todo list.
Cheers,
Michi.