使用google::protobuf進行序列化,在運行時有如下警告!
libprotobuf ERROR google/protobuf/wire_format.cc:1059] Encountered string containing invalid UTF-8 data while serializing protocol buffer. Strings must contain only UTF-8; use the 'bytes' type for raw bytes.
libprotobuf ERROR google/protobuf/wire_format.cc:1059] Encountered string containing invalid UTF-8 data while serializing protocol buffer. Strings must contain only UTF-8; use the 'bytes' type for raw bytes.
原因:要求所有的string類型都必須爲UTF-8類型的,可以使用<iconv.h> 進行格式轉化。
介紹一下字符集相關知識:
在技術編碼方面上,演化順序爲:ASCII ⇒ GB2312 ⇒ GBK ⇒ GB18030
先面貼一段轉化的代碼:
include <string>
#include <stdlib.h>
#include <iostream>
using namespace std;
#include <iconv.h>
bool convertGbk2Utf(string& instr, string& outstr)
{
iconv_t gbk2UtfDescriptor;
gbk2UtfDescriptor = iconv_open("UTF-8", "GBK");
size_t inlen = instr.length();
char* in = const_cast<char*>(instr.c_str());
size_t outlen = inlen * 2 + 1; // inlen * 1.5 + 1 >= outlen >= inlen + 1
char* outbuf = (char*)::malloc(outlen);
char* out = outbuf;
memset(outbuf, 0x0, outlen);
if((size_t)-1 == iconv(gbk2UtfDescriptor, &in, &inlen, &out, &outlen))
{
::free(outbuf);
return false;
}
outstr.clear();
outstr.append(outbuf);
::free(outbuf);
return true;
}
int main()
{
string str = "黃";
convertGbk2Utf(str,str);
cout << str << endl;
}