UTF8到Unicode
来源:百度文库 编辑:神马文学网 时间:2024/04/30 04:06:30
这段代码不仅仅是含有UTF8到Unicode的转换,还包括Gb2312,Unicode 到Gb2312 如过不使用Windows API的话,那么自己因该建立一个转化表,这样效率才会好,因为我一时没有找到这种表。如果谁有请贴上来。谢谢。
#include
#include
using namespace std;
int GetNo(char ch)
{
int i = 0;
while (ch & 0x80)
{
++i;
ch <<= 1;
}
return i == 0 ? 1 : i;
}
void UTF8ToUnicode(const char *pUtf8, char *pUnicode, int nBytes)
{
switch (nBytes)
{
case 1:
*pUnicode = *pUtf8;
pUnicode += 1;
break;
case 2:
*pUnicode = ((*(pUtf8 + 1)) & 0x3) << 6 | (*pUtf8) & 0x3f;
*(pUnicode + 1) = ((*pUtf8) & 0x1f) >> 2;
pUnicode += 2;
break;
case 3:
*pUnicode = ((*(pUtf8 + 1)) & 0x3) << 6 | ((*(pUtf8 + 2)) & 0x3f);
*(pUnicode + 1) = ((*pUtf8) & 0xf) << 4 | (((*(pUtf8 + 1)) & 0x3f) >> 2);
pUnicode += 2;
break;
default:
break;
}
*pUnicode = '';
}
int UnicodeToGB2312(char *pUnicode, char *pGb2312, int nBytes)
{
int nMultiBytes =::WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, (LPCWSTR)(pUnicode), -1,
pGb2312, 0, NULL, NULL);
if (nMultiBytes >= nBytes)
return -1;
int nError = ::WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, (LPCWSTR)(pUnicode), -1,
pGb2312, nBytes, NULL, NULL);
if (nError == 0)
return -1;
return nMultiBytes - 1;
}
BOOL UTF8ToGb2312(const char *pUtf8, char *pGb2312)
{
while (*pUtf8 != '')
{
int nBytes = GetNo(*pUtf8);
TCHAR szUnicode[5] = {0};
UTF8ToUnicode(pUtf8, szUnicode, nBytes);
TCHAR szGb2312[5] = {0};
int nSize = UnicodeToGB2312(szUnicode, szGb2312,
sizeof(szGb2312) - sizeof(szGb2312[0]));
if (nSize == -1 || nSize == 0)
{
return FALSE;
}
CopyMemory(pGb2312, szGb2312, nSize);
pUtf8 += nBytes;
pGb2312 += nSize ;
}
return TRUE;
}
#include
#include
using namespace std;
int GetNo(char ch)
{
int i = 0;
while (ch & 0x80)
{
++i;
ch <<= 1;
}
return i == 0 ? 1 : i;
}
void UTF8ToUnicode(const char *pUtf8, char *pUnicode, int nBytes)
{
switch (nBytes)
{
case 1:
*pUnicode = *pUtf8;
pUnicode += 1;
break;
case 2:
*pUnicode = ((*(pUtf8 + 1)) & 0x3) << 6 | (*pUtf8) & 0x3f;
*(pUnicode + 1) = ((*pUtf8) & 0x1f) >> 2;
pUnicode += 2;
break;
case 3:
*pUnicode = ((*(pUtf8 + 1)) & 0x3) << 6 | ((*(pUtf8 + 2)) & 0x3f);
*(pUnicode + 1) = ((*pUtf8) & 0xf) << 4 | (((*(pUtf8 + 1)) & 0x3f) >> 2);
pUnicode += 2;
break;
default:
break;
}
*pUnicode = '';
}
int UnicodeToGB2312(char *pUnicode, char *pGb2312, int nBytes)
{
int nMultiBytes =::WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, (LPCWSTR)(pUnicode), -1,
pGb2312, 0, NULL, NULL);
if (nMultiBytes >= nBytes)
return -1;
int nError = ::WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, (LPCWSTR)(pUnicode), -1,
pGb2312, nBytes, NULL, NULL);
if (nError == 0)
return -1;
return nMultiBytes - 1;
}
BOOL UTF8ToGb2312(const char *pUtf8, char *pGb2312)
{
while (*pUtf8 != '')
{
int nBytes = GetNo(*pUtf8);
TCHAR szUnicode[5] = {0};
UTF8ToUnicode(pUtf8, szUnicode, nBytes);
TCHAR szGb2312[5] = {0};
int nSize = UnicodeToGB2312(szUnicode, szGb2312,
sizeof(szGb2312) - sizeof(szGb2312[0]));
if (nSize == -1 || nSize == 0)
{
return FALSE;
}
CopyMemory(pGb2312, szGb2312, nSize);
pUtf8 += nBytes;
pGb2312 += nSize ;
}
return TRUE;
}
UTF8到Unicode
iso10646/unicode/utf8
对字符编码与Unicode,ISO 10646,UCS,UTF8,UTF16,GBK,GB...
文件批量改名工具官网–新起飞部落 ? lazarus UTF8 unicode
对字符编码与Unicode,ISO 10646,UCS,UTF8,UTF16,GBK,GB...
unicode
utf8的编码算法
unicode编码
unicode 汉字编码
Unicode编码
java, unicode and xml
什么是Unicode(统一码)?
Unicode字符编码规范
UNICODE环境设置
什么是Unicode(统一码)?
Unicode详解又一篇
Linux Unicode 编程
VC++的Unicode编程
UNICODE环境设置
Unicode字符编码规范
对UTF8编码的初步认识
escape utf8字符串的php实现
修改mysql字符编码成为UTF8
修改mysql字符编码成为UTF8