关于utf8转码问题
要和服务器端通信,中文(std::wtring)转utf8,用的是SysWideToMultiByte(wide, CP_UTF8);但看到有些网页上的中文转utf8转换器,转出来的结果完全不一样,比如这些:
中文:
中国互联网络信息中心.cn
http://www.tool.la/UTF-8/
http://tool.chinaz.com/Tools/UTF-8.aspx
中国互联网络信息中心.cn
我转出来的结果和windows notepad一致:
0x E4 E8 AD E5 9B BD E4 BA 92 E8 81 94 E7 BD 91 E7 BB 9C E4 BF A1 E6 81 AF E4 B8 AD E5 BF 83 (2E 63 6E .cn)
求助ing~~~~~~
[解决办法]
WideCharToMultiByte(CP_UTF8, ……) 要从UNICODE转到UTF8
如果是ANSI字符的话,还得转成UNICODE字符, MultiByteToWideChar(CP_ACP, ……)
[解决办法]
\xe4\xb8\xad\xe5\x9b\xbd\xe4\xba\x92\xe8\x81\x94\xe7\xbd\x91\xe7\xbb\x9c\xe4\xb
f\xa1\xe6\x81\xaf\xe4\xb8\xad\xe5\xbf\x83.cn
[解决办法]
以前为了使用 SQLite, 编写的一个转换类.
- C/C++ code
class CTranslateUTF8{public: CTranslateUTF8(void); ~CTranslateUTF8(void); static BOOL AsciiToUnicode(LPWSTR strDes,int &iDesSize,LPCSTR strSrc); static BOOL UnicodeToAscii(LPSTR strDes,int &iDesSize,LPCWSTR strSrc); static BOOL UnicodeToUtf8(LPSTR strDes,int &iDesSize,LPCWSTR strSrc); static BOOL Utf8ToUnicode(LPWSTR strDes,int &iDesSize,LPCSTR strSrc); static BOOL AsciiToUtf8(LPSTR strDes,int &iDesSize,LPCSTR strSrc); static BOOL Utf8ToAscii(LPSTR strDes,int &iDesSize,LPCSTR strSrc); static BOOL Utf8ToTstr(LPTSTR strDes,int &iDesSize,LPCSTR strSrc); static BOOL TstrToUtf8(LPSTR strDes,int &iDesSize,LPCTSTR strSrc); static BOOL TstrToUnicode(LPWSTR strDes,int &iDesSize,LPCTSTR strSrc); static BOOL TstrToAscii(LPSTR strDes,int &iDesSize,LPCTSTR strSrc); static BOOL AsciiToTstr(LPTSTR strDes,int &iDesSize,LPCSTR strSrc);};#include "StdAfx.h"#include "TranslateUTF8.h"CTranslateUTF8::CTranslateUTF8(void){}CTranslateUTF8::~CTranslateUTF8(void){}BOOL CTranslateUTF8::UnicodeToAscii(LPSTR strDes,int &iDesSize,LPCWSTR strSrc){ char* pElementText=NULL; int iTextLen=0; // wide char to multi char iTextLen = WideCharToMultiByte( CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL ); if(iDesSize<iTextLen+2) { iDesSize=iTextLen+2; return FALSE; } pElementText = new char[iTextLen + 1]; memset( ( void* )pElementText, 0, sizeof( char ) * ( iTextLen + 1 ) ); ::WideCharToMultiByte( CP_ACP, 0, strSrc, -1, pElementText, iTextLen, NULL, NULL ); memcpy(strDes,pElementText,iTextLen); delete[] pElementText; return TRUE;}BOOL CTranslateUTF8::AsciiToUnicode(LPWSTR strDes,int &iDesSize,LPCSTR strSrc){ //int len = 0; //len = strlen(strSrc); int unicodeLen = ::MultiByteToWideChar( CP_ACP, 0, strSrc, -1, NULL, 0 ); if(iDesSize<unicodeLen+2) { iDesSize=unicodeLen+2; return FALSE; } wchar_t * pUnicode=NULL; pUnicode = new wchar_t[unicodeLen+1]; memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t)); ::MultiByteToWideChar( CP_ACP, 0, strSrc, -1, (LPWSTR)pUnicode, unicodeLen ); memcpy(strDes,pUnicode,unicodeLen*2); delete [] pUnicode; return TRUE; }BOOL CTranslateUTF8::UnicodeToUtf8(LPSTR strDes,int &iDesSize,LPCWSTR strSrc){ char* pElementText=NULL; int iTextLen=0; // wide char to multi char iTextLen = WideCharToMultiByte( CP_UTF8, 0, strSrc, -1, NULL, 0, NULL, NULL ); if(iDesSize<iTextLen+2) { iDesSize=iTextLen+2; return FALSE; } pElementText = new char[iTextLen + 1]; memset( ( void* )pElementText, 0, sizeof( char ) * ( iTextLen + 1 ) ); ::WideCharToMultiByte( CP_UTF8, 0, strSrc, -1, pElementText, iTextLen, NULL, NULL ); memcpy(strDes,pElementText,iTextLen); delete[] pElementText; return TRUE;}BOOL CTranslateUTF8::Utf8ToUnicode(LPWSTR strDes,int &iDesSize,LPCSTR strSrc){ //int len = 0; //len = strlen(strSrc); int unicodeLen = ::MultiByteToWideChar( CP_UTF8, 0, strSrc, -1, NULL, 0 ); if(iDesSize<unicodeLen+2) { iDesSize=unicodeLen+2; } wchar_t * pUnicode; pUnicode = new wchar_t[unicodeLen+1]; memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t)); ::MultiByteToWideChar( CP_UTF8, 0, strSrc, -1, (LPWSTR)pUnicode, unicodeLen ); memcpy(strDes,pUnicode,unicodeLen*2); delete [] pUnicode; return TRUE;}BOOL CTranslateUTF8::AsciiToUtf8(LPSTR strDes,int &iDesSize,LPCSTR strSrc){ WCHAR *buf=new WCHAR[iDesSize]; memset(buf,0,iDesSize*2); int len=iDesSize; if(AsciiToUnicode(buf,len,strSrc)) { if(UnicodeToUtf8(strDes,iDesSize,buf)) { delete [] buf; return TRUE; } } delete [] buf; return FALSE;}BOOL CTranslateUTF8::Utf8ToAscii(LPSTR strDes,int &iDesSize,LPCSTR strSrc){ WCHAR *buf=new WCHAR[iDesSize]; memset(buf,0,iDesSize*2); int len=iDesSize; if(Utf8ToUnicode(buf,len,strSrc)) { if(UnicodeToAscii(strDes,iDesSize,buf)) { delete [] buf; return TRUE; } } delete [] buf; return FALSE;}BOOL CTranslateUTF8::Utf8ToTstr(LPTSTR strDes,int &iDesSize,LPCSTR strSrc){#ifdef _UNICODE return Utf8ToUnicode(strDes,iDesSize,strSrc);#else return Utf8ToAscii(strDes,iDesSize,strSrc);#endif}BOOL CTranslateUTF8::TstrToUtf8(LPSTR strDes,int &iDesSize,LPCTSTR strSrc){#ifdef _UNICODE return UnicodeToUtf8(strDes,iDesSize,strSrc);#else return AsciiToUtf8(strDes,iDesSize,strSrc);#endif}BOOL CTranslateUTF8::TstrToAscii(LPSTR strDes,int &iDesSize,LPCTSTR strSrc){#ifdef _UNICODE return UnicodeToAscii(strDes,iDesSize,strSrc);#else memcpy(strDes,strSrc,strlen(strSrc)); return TRUE;#endif return TRUE;}BOOL CTranslateUTF8::TstrToUnicode(LPWSTR strDes,int &iDesSize,LPCTSTR strSrc){#ifdef _UNICODE memcpy(strDes,strSrc,wcslen(strSrc)*2); return TRUE;#else return AsciiToUnicode(strDes,iDesSize,strSrc);#endif}BOOL CTranslateUTF8::AsciiToTstr(LPTSTR strDes,int &iDesSize,LPCSTR strSrc){#ifdef _UNICODE return AsciiToUnicode(strDes,iDesSize,strSrc);#else strcpy_s(strDes,iDesSize,strSrc); return TRUE;#endif}