文字:
Z 饕餮🇨🇳❤️☎️ 😀 圞壜奱
(這裏第4個“CN”是一個國旗的字符,這裏顯示不出來,在手機上輸入國旗可以顯示,見下圖)
\u005a Z ->uft8: 5A
\u0020 空格 ->uft8: 20
\u9955 饕 ->uft8: E9 A5 95
\u992e 餮 ->uft8: E9 A4 AE
\ud83c\udde8\ud83c\uddf3 紅旗🇨🇳 ->uft8: F0 9F 87 A8 F0 9F 87 B3
\u2764\ufe0f心形❤️ ->uft8: E2 9D A4 EF B8 8F
\u260e\ufe0f ☎️ ->uft8: E2 98 8E EF B8 8F
\ud83d \ude00 😀 ->uft8: F0 9F 98 80
\u0020 空格 ->uft8: 20
\u571e 圞 ->uft8: E5 9C 9E
\u58dc 壜 ->uft8: E5 A3 9C
\u5971 奱 ->uft8: E5 A5 B1
utf16:
5a 00 20 00 55 99 2e 99 3c d8 e8 dd 3c d8 f3 dd 64 27 0f fe 0e 26 0f fe 3d d8 00 de 20 00 1e 57 dc 58 71 59
utf16:
\u005a \u0020 \u9955 \u992e \ud83c \udde8 \ud83c \uddf3 \u2764 \ufe0f \u260e \ufe0f \ud83d \ude00 \u0020 \u571e \u58dc \u5971
utf-8:
5a 20 e9 a5 95 e9 a4 ae F0 9F 87 A8 F0 9F 87 B3 e2 9d a4 ef b8 8f e2 98 8e ef b8 8f f0 9f 98 80 20 e5 9c 9e e5 a3 9c e5 a5 b1
代碼實現:
/*------------------------------------------------------------------------------*/
/*Name : StrUnicodeToUtf8 */
/*Role : Converts a string of type unicode(UCS-2) to a string of type utf8 */
/*Interface : */
/* - IN : UnicBytes:unicode string(UCS-2) Little Endian */
/* - IN : UnicChLen:unicode char len */
/* - IN/OUT : Utf8Buf:Utf8 string buf */
/* - IN : BufSize:Utf8 string buf size */
/* - OUT : OutBytesSize:Output Utf8 string bytes len */
/* - return : result of convert, 0:convert success, 1:param error, 2:out of buf*/
/*Pre-condition : Unicode(UCS-2) Little Endian */
/*------------------------------------------------------------------------------*/
int StrUnicodeToUtf8(ubyte *UnicBytes, int UnicChLen, ubyte *Utf8Buf, int BufSize, int *OutBytesSize)
{
int ChCnt = 0;
ulong UnicCh = 0;
ulong UnicChUTF16Low = 0;
ubyte Utf8Ch[6] = {0};
ubyte *pIndex = Utf8Buf;
int BytesCnt = 0;
int Utf8ChBytesLen = 0;
int ret = 0;
if(NULL==UnicBytes || NULL==Utf8Buf || NULL==OutBytesSize)
{
ret = 1;
}
else
{
for(ChCnt=0; ChCnt<UnicChLen; ChCnt++)
{
UnicCh = 0;
UnicCh = UnicBytes[ChCnt*2+1];
UnicCh <<= 8;
UnicCh |= UnicBytes[ChCnt*2];
/*UTF-16*/
/*
* High-half zone of UTF-16 = ((emoji code)-0x10000)/0x400+0xD800
* Low-half zone of UTF-16 =((emoji code)-0x10000)%0x400+0xDC00
**/
if(UnicCh>=0xD800 && UnicCh<=0xDBFF)
{
if(ChCnt+1<UnicChLen)
{
UnicChUTF16Low = UnicBytes[ChCnt*2+3];
UnicChUTF16Low <<= 8;
UnicChUTF16Low |= UnicBytes[ChCnt*2+2];
if(UnicChUTF16Low>=0xDC00 && UnicChUTF16Low<=0xDFFF)
{
UnicCh = (UnicCh-0xD800)*0x400;
UnicCh += (UnicChUTF16Low-0xDC00+0x10000);
ChCnt++;
}
}
}
/*Convert*/
Utf8ChBytesLen = UnicodeToUtf8(UnicCh, Utf8Ch, sizeof(Utf8Ch));
if(BytesCnt+Utf8ChBytesLen <= BufSize)
{
memcpy(pIndex, Utf8Ch, Utf8ChBytesLen);
pIndex += Utf8ChBytesLen;
BytesCnt += Utf8ChBytesLen;
}
else
{
ret = 2;
break;
}
}/*end for*/
*OutBytesSize = BytesCnt;
*pIndex = 0; /* add end signal of string */
}
return ret;
}
/*----------------------------------------------------------------------------*/
/*Name : UnicodeToUtf8 */
/*Role : Converts unicode(UCS-2&UCS-4) to utf8 */
/*Interface : */
/* - IN : unic:unicode code */
/* - OUT : pOutput:Output uft8 code */
/* - IN : outSize:pOutput buf size */
/* - return : valid utf8 bytes size */
/*Pre-condition : */
/*----------------------------------------------------------------------------*/
int UnicodeToUtf8(ulong unic, ubyte *pOutput, int outSize)
{
int Ret = 0;
if( (NULL != pOutput) && (outSize >= 6) ) /*pOutput not null and outSize size big than 6(utf8 needs a maximum of 6 bytes)*/
{
if ( unic <= 0x0000007F )
{
// * U-00000000 - U-0000007F: 0xxxxxxx
*pOutput = (unic & 0x7F);
Ret = 1;
}
else if ( unic >= 0x00000080 && unic <= 0x000007FF )
{
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
*(pOutput+1) = (unic & 0x3F) | 0x80;
*pOutput = ((unic >> 6) & 0x1F) | 0xC0;
Ret = 2;
}
else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )
{
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
*(pOutput+2) = (unic & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 6) & 0x3F) | 0x80;
*pOutput = ((unic >> 12) & 0x0F) | 0xE0;
Ret = 3;
}
else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )
{
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+3) = (unic & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;
*pOutput = ((unic >> 18) & 0x07) | 0xF0;
Ret = 4;
}
else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )
{
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+4) = (unic & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;
*pOutput = ((unic >> 24) & 0x03) | 0xF8;
Ret = 5;
}
else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )
{
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+5) = (unic & 0x3F) | 0x80;
*(pOutput+4) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;
*pOutput = ((unic >> 30) & 0x01) | 0xFC;
Ret = 6;
}
else
{
Ret = 0;
}
}
else
{
Ret = 0;
}
return Ret;
}
編碼轉換
http://tool.chinaz.com/tools/unicode.aspx
utf-8轉換
https://www.qqxiuzi.cn/bianma/Unicode-UTF.php
emoji表情大全
http://bbs.52svip.cn/emoji/
Unicode編碼表
https://www.cnblogs.com/csguo/p/7401874.html
變量選擇器
http://www.seotest.cn/jishu/34607.html