前言
1.C++的string對中文的查找替換之類的基本操作並不友好,如果要對中文進行操作,要把中文轉成寬字符(wstring)來解決,因爲中文字符長度不確定的,在unicode中每個中文爲2個字節,而字符串中有時還可能有英文數字字符等,這些只佔一個字節1個字節,查找的過程很容易返回的不是找到的位置。
2.如果要操作中文字符串,比較好的辦法先把string轉成wstring,進行操作查找匹配操作之後,再轉回來。
3.這裏我定了個類,把它們之間的互相轉換都封裝成函數。
代碼
Chinese.h
#pragma once
#include <string>
#include <iostream>
class Chinese
{
public:
Chinese();
~Chinese();
//char*轉換爲wchar_t*
wchar_t* MBCSToUnicode(wchar_t * buff, const char * str);
//wchar*轉換爲char*
char* unicodeToMBCS(char* buff, const wchar_t* str);
//string轉wstring
std::wstring strToWstr(std::string &input);
std::string wstrToStr(std::wstring &wstr);
char* wstrToChar(std::wstring &wstr);
char* wstrToChar(const wchar_t* wstr);
};
Chinese.cpp
#include "Chinese.h"
Chinese::Chinese()
{
}
Chinese::~Chinese()
{
}
wchar_t* Chinese::MBCSToUnicode(wchar_t* buff, const char* str)
{
wchar_t * wp = buff;
char * p = (char *)str;
while (*p)
{
if (*p & 0x80)
{
*wp = *(wchar_t *)p;
p++;
}
else {
*wp = (wchar_t)*p;
}
wp++;
p++;
}
*wp = 0x0000;
return buff;
}
char* Chinese::unicodeToMBCS(char* buff, const wchar_t* str)
{
wchar_t * wp = (wchar_t *)str;
char * p = buff, *tmp;
while (*wp)
{
tmp = (char *)wp;
if (*wp & 0xFF00)
{
*p = *tmp;
p++; tmp++;
*p = *tmp;
p++;
}
else
{
*p = *tmp;
p++;
}
wp++;
}
*p = 0x00;
return buff;
}
std::wstring Chinese::strToWstr(std::string &input)
{
size_t len = input.size();
wchar_t * b = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
MBCSToUnicode(b, input.c_str());
std::wstring r(b);
free(b);
return r;
}
char* Chinese::wstrToChar(std::wstring &wstr)
{
char* re = wstrToChar(wstr.c_str());
return re;
}
char* Chinese::wstrToChar(const wchar_t* wstr)
{
int len = wcslen(wstr);
char * buff = (char *)malloc((len * 2 + 1) * sizeof(char));
char* re = unicodeToMBCS(buff, wstr);
free(buff);
return re;
}
std::string Chinese::wstrToStr(std::wstring &wstr)
{
size_t len = wstr.size();
char * b = (char *)malloc((2 * len + 1) * sizeof(char));
unicodeToMBCS(b, wstr.c_str());
std::string r(b);
free(b);
return r;
}
main.cpp
#include <iostream>
#include <string>
#include "Chinese.h"
int main()
{
//輸入層:接收char*輸入,並將其轉換爲wchar*
std::string input = "於老師的k父親王老爺子是蒙古的海軍司令!yes";
std::string temp = "王";
Chinese ch;
std::wstring w_str = ch.strToWstr(input);
std::wstring w_tem = ch.strToWstr(temp);
int index = w_str.find(w_tem);
std::cout << index << std::endl;
return 0;
}