VS2010複製中文亂碼修復

  當複製VS2010中帶有中文字符的代碼到Office(Word, Excel, PowerPoint, Outlook)時,
在中文字符後面會被添加一到三個亂碼。比如複製"中文字符",粘貼到word就會變成(中D文?字Á?符¤?)。

 

    通過黑暗執行緒的一篇文章提示,截取剪貼板中RTF(富文本格式)的數據流,並作出修復。該作者使用了 .NET中的正則表達式類RegEx來達到修復目的。方法調用很簡單,感興趣的可以搜索一下作者的博客空間。

 

      我這裏給出另一個解決方法:同時截取CF_UNICODETEXTRTF兩種剪貼板數據,以CF_UNICODETEXT中的正確數據爲參照,對比剔除RTF中的亂碼。

 

      寫這份代碼走了不少彎路,主要是一開始對RTF格式不熟悉,其中的中文接ASCII碼處理着實讓我差點抓破了頭皮。

 

先給出亂碼修復截圖:

 

 

VS2010CopyModify 

程序添加了全局熱鍵 CTRL+Q 和 監測狀態。右下角則是一個動態文字Logo,鼠標移上去就會出現Logo,

移開則改爲快捷鍵說明。編譯好的文件這裏下載VS2010CopyModify.rar。

 

 

下面貼出程序的核心算法:

 

函數參數說明:

PTSTR        pText     (RTF數據首地址指針,切勿傳遞剪貼板內存,必須是程序拷貝剪貼板的。)

UINT          iText      (RTF數據大小。單位字節)

LPWSTR    pTextU   (CF_UNICODETEXT數據首地址指針,同上,不可爲剪貼板內存。)

UINT          iTextU    (CF_UNICODETEXT數據大小。單位字符)

 

返回值類型  UINT      (修復完後RTF數據的大小。單位字節)


 

/*-------------------------------
  Repair.cpp - 實作亂碼修正處理
  -------------------------------*/

#include <windows.h>
#include <string.h>

UINT ClobalRepair (PTSTRpText,UINTiText,LPWSTRpTextU,UINTiTextU)
{
    const PTSTR  tZ= "//uinput2//u",
                          tA = "//cf0//par",
                          tP = "//par";
    TCHAR           AnsiT[11];
    PTSTR            pTextS  = pText,
                          pTextW  = NULL,
                          pAnsiT  = AnsiT;
    PTCHAR          pTextK  = NULL;
    UINT               i,iU,iC = 0,iR= 0,
                         UTRange = 10;
    BOOL            bAnsi=FALSE;
    LPWSTR        pTextUW = pTextU;
    long int         iM;

    while(TRUE)
    {
        // 查找/uinput2/u
        pTextS =strstr(pTextS,tZ);
        if(pTextS==NULL)
            break;
        // 指向Unicode編碼的首字節
        pTextS =&pTextS[UTRange];
        // 獲取Unicode編碼的十六進制值
        iM =strtol(pTextS,&pTextK,10);
        // 修正負數補碼的問題
        iM =iM& 0xFFFF;

        pTextS =pTextK;
        pTextS =&pTextS[1];
        pTextS[0]=' ';
        pTextS =&pTextS[1];

        // 在Unicode數據中對焦中文字符
        for (iU=0; iU < iTextU;iU++)
        {
            if(pTextUW[iU]==iM)
            {
                pTextUW = &pTextUW[iU+1];
                iTextU -= (iU+1);
                break;
            }
        }

        // 中文接中文的處理
        if (pTextUW[0]>0x80)
        {
            pTextW = strstr(pTextS,tZ);
            iC =pTextW- pTextS;
            i  =pTextS- pText;
            iR =i+ iC;
            for (i=0; iR <= iText;i++,iR++)
                pTextS[i]=pTextW[i];
            iText -= iC;
            continue;
        }
        // 文檔尾的處理
        if(pTextUW[0]==NULL)
        {
            pTextW = strstr(pTextS,tP);
            iC =pTextW- pTextS;
            i  =pTextS- pText;
            iR =i+ iC;
            for (i=0; iR <= iText;i++,iR++)
                pTextS[i]=pTextW[i];
            iText -= iC;
            continue;
        }
        // 換行符、回車及水平製表
        if (pTextUW[0]==0x0D || pTextUW[0]==0x09)
        {
            pTextW = strstr(pTextS,tA);
            iC =pTextW- pTextS;
            i  =pTextS- pText;
            iR =i+ iC;
            for (i=0; iR <= iText;i++,iR++)
                pTextS[i]=pTextW[i];
            iText -= iC;
            continue;
        }
        // 中文接ASCII的處理
        if (pTextUW[0]<=0x80 && pTextUW[0]>=0x20)
        {
            for (i=0; i < 11;i++)
                AnsiT[i]=NULL;
            pAnsiT  = AnsiT;
            for (iR=0; iR < 4&& ((pTextUW[iR]<=0x80) && (pTextUW[iR]>=0x20)); iR++)
            {
                if (pTextUW[iR]==0x5C || pTextUW[iR]==0x7B || pTextUW[iR]==0x7D)
                {
                    pAnsiT[0]='//';
                    pAnsiT    = &pAnsiT[1];
                    pAnsiT[0]=pTextUW[iR];
                    pAnsiT    = &pAnsiT[1];
                }
                else
                {
                    pAnsiT[0]=pTextUW[iR];
                    pAnsiT    = &pAnsiT[1];
                }
            }
            if (strlen(AnsiT)<=3)
                AnsiT[iR]='//';

            pTextW = pTextS;
            pTextW = strstr(pTextS,AnsiT);
            if(pTextW==NULL)
            {
                bAnsi = FALSE;
                pTextW = pTextS;
                for (iC=0; TRUE; iC++)
                {
                    if ((pTextW[iC]=='//' && pTextW[iC-1]!='//') &&
                        (pTextW[iC+1]=='c' || pTextW[iC+1]=='p'&&
                        (pTextW[iC+2]=='f' || pTextW[iC+2]=='a'&&
                        (pTextW[iC+3]=='0' || pTextW[iC+3]=='r'))
                    {
                        pTextW = &pTextW[iC];
                        break;
                    }
                }

                while(TRUE)
                {
                    if (pTextW[0]==AnsiT[0])
                    {
                        if (AnsiT[0]=='//')
                        {
                            if (pTextW[1]==AnsiT[1])
                                break;
                        }
                        else
                        {
                            if (AnsiT[1]!=NULL && pTextW[1]==AnsiT[1])
                                break;
                            else
                            {
                                if(pTextW[1]=='//' &&
                                  (pTextW[2]=='c'|| pTextW[2]=='p') &&
                                  (pTextW[3]=='f'|| pTextW[3]=='a'))
                                {
                                  bAnsi = TRUE;
                                  break;
                                }
                            }
                        }
                    }
                    pTextW = &pTextW[-1];
                }

                if (AnsiT[2]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2])
                        {
                            if (AnsiT[2]=='//')
                            {
                                if (pTextW[3]==AnsiT[3])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[3]!=NULL && pTextW[3]==AnsiT[3])
                                    break;
                                else
                                {
                                    if  (pTextW[3]=='//' &&
                                        (pTextW[4]=='c' || pTextW[4]=='p') &&
                                        (pTextW[5]=='f' || pTextW[5]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }

                if (AnsiT[3]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2]&&pTextW[3]==AnsiT[3])
                        {
                            if (AnsiT[3]=='//')
                            {
                                if (pTextW[4]==AnsiT[4])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[4]!=NULL && pTextW[4]==AnsiT[4])
                                    break;
                                else
                                {
                                    if  (pTextW[4]=='//' &&
                                        (pTextW[5]=='c' || pTextW[5]=='p') &&
                                        (pTextW[6]=='f' || pTextW[6]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }

                if (AnsiT[4]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2]&&pTextW[3]==AnsiT[3]&&
                            pTextW[4]==AnsiT[4])
                        {
                            if (AnsiT[4]=='//')
                            {
                                if (pTextW[5]==AnsiT[5])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[5]!=NULL && pTextW[5]==AnsiT[5])
                                    break;
                                else
                                {
                                    if  (pTextW[5]=='//' &&
                                        (pTextW[6]=='c' || pTextW[6]=='p') &&
                                        (pTextW[7]=='f' || pTextW[7]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }

                if (AnsiT[5]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2]&&pTextW[3]==AnsiT[3]&&
                            pTextW[4]==AnsiT[4]&&pTextW[5]==AnsiT[5])
                        {
                            if (AnsiT[5]=='//')
                            {
                                if (pTextW[6]==AnsiT[6])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[6]!=NULL && pTextW[6]==AnsiT[6])
                                    break;
                                else
                                {
                                    if  (pTextW[6]=='//' &&
                                        (pTextW[7]=='c' || pTextW[7]=='p') &&
                                        (pTextW[8]=='f' || pTextW[8]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }

                if (AnsiT[6]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2]&&pTextW[3]==AnsiT[3]&&
                            pTextW[4]==AnsiT[4]&&pTextW[5]==AnsiT[5]&&
                            pTextW[6]==AnsiT[6] )
                        {
                            if (AnsiT[6]=='//')
                            {
                                if (pTextW[7]==AnsiT[7])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[7]!=NULL && pTextW[7]==AnsiT[7])
                                    break;
                                else
                                {
                                    if  (pTextW[7]=='//' &&
                                        (pTextW[8]=='c' || pTextW[8]=='p') &&
                                        (pTextW[9]=='f' || pTextW[9]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }

                if (AnsiT[7]!=NULL && bAnsi !=TRUE)
                {
                    while(TRUE)
                    {
                        if (pTextW[0]==AnsiT[0]&&pTextW[1]==AnsiT[1]&&
                            pTextW[2]==AnsiT[2]&&pTextW[3]==AnsiT[3]&&
                            pTextW[4]==AnsiT[4]&&pTextW[5]==AnsiT[5]&&
                            pTextW[6]==AnsiT[6]&&pTextW[7]==AnsiT[7])
                        {
                            if (AnsiT[7]=='//')
                            {
                                if (pTextW[8]==AnsiT[8])
                                    break;
                            }
                            else
                            {
                                if (AnsiT[8]!=NULL && pTextW[8]==AnsiT[8])
                                    break;
                                else
                                {
                                    if  (pTextW[8]=='//' &&
                                        (pTextW[9]=='c' || pTextW[9]=='p') &&
                                        (pTextW[10]=='f' || pTextW[10]=='a'))
                                    {
                                        bAnsi = TRUE;
                                        break;
                                    }
                                }
                            }
                        }
                        pTextW = &pTextW[-1];                       
                    }
                }
            }

            iC =pTextW- pTextS;
            i  =pTextS- pText;
            iR =i+ iC;
            for (i=0; iR <= iText;i++,iR++)
                pTextS[i]=pTextW[i];
            iText -= iC;
            continue;
        }
    }
    return iText;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章