c語言對utf8字符串的截取

utf8編碼的漢字一般佔用3字節,下面默認漢字全是常規的3字節漢字。

void splitUtf8(const char *s, char* store, int l)
{                                                     
    if (strlen(s) <= 0 || l <=0)
    {
        return;
    }

    int i = 0, len = 0;
    int f_len = strlen(s) >= l ? l : strlen(s);
    while (i < f_len)
    {
        if (s[i] >> 7 & 1 && s[i+1] >> 7 & 1)
        {
            cout << "漢字 i = " << i << '\t';
            i = i + 3;
            len = 3;
        }
        else
        {
            cout << s[i] << "-i=" << i << '\t';
            i = i + 1;
            len = 1;
        }
        cout << "end-i=" << i << '\t';
    }
    //i += 1;
    cout << "\ni = " << i << ",len = " << len << endl;
    if (i > f_len)
        i = i - len;
    strncpy(store, s, i);
    *(store + i) = 0;
    cout << "desc len=" << strlen(store) << endl;
}

int main()
{
    string str = "一二三";
    //string str = "一二三四五六sss";
    char desc[64];
    memset(desc, 0, sizeof(desc));
    splitUtf8(str.c_str(), desc, 12);
    printf("desc =%s]\n", desc);
    strcat(desc, "123");
    printf("desc =%s]\n", desc);
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章