iconv是一個通過unicode作爲中間碼實現各種內碼間相互轉換的庫,它基本上囊括了世界上所有編碼方式,例如,ASCII、GB2312、 GBK、 GB18030、BIG5、UTF-8、UCS-2、UCS-2BE、UCS-2LE、UCS-4、UCS-4BE、UCS-4LE、UTF-16、 UTF-16BE、UTF-16LE、UTF-32、UTF-32BE、UTF-32LE、UTF-7等等等,除此之外,還包括泰語、日語、韓語、西歐等 國家語言的編碼。下面我們演示如何使用iconv實現Big5到GB2312的轉換,當然只要簡單修改一下便可實現iconv支持任何編碼間的轉換。
下載
libiconv是linux版本的iconv,可在 http://www.gnu.org/software/libiconv/ 下載
iconv的win32版本可以在 http://gnuwin32.sourceforge.net/packages/libiconv.htm 下載
SVN源碼
另外,還有一些演示代碼,需要的可以到我的SVN下載
http://xcyber.googlecode.com/svn/trunk/Convert/
演示代碼
- /****************************************************************************
- * Big5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file
- * File:
- * Big5ToGb2312.c
- * Description:
- * Convert Big5 encoding file to GB2312 encoding file using iconv library
- * Author:
- * XCyber email:[email protected]
- * Date:
- * August 7, 2008
- * Other:
- * visit http://www.gnu.org/software/libiconv/ for more help of iconv
- ***************************************************************************/
- #include <stdio.h>
- #include <stdlib.h>
- #include <tchar.h>
- #include <locale.h>
- #include "../iconv-1.9.2.win32/include/iconv.h"
- //#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv.lib") // using iconv dynamic-link lib, iconv.dll
- #pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv_a.lib") // using iconv static lib
- #define BUFFER_SIZE 1024 //BUFFER_SIZE must >= 2
- void usage()
- {
- printf("/nBig5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file/n");
- printf("[email protected] on August 7, 2008/n");
- printf(" Usage:/n");
- printf(" Big5ToGB2312 [Big5 file(in)] [GB2312 file(out)]/n/n");
- }
- int main(int argc, char* argv[])
- {
- FILE * pSrcFile = NULL;
- FILE * pDstFile = NULL;
- char szSrcBuf[BUFFER_SIZE];
- char szDstBuf[BUFFER_SIZE];
- size_t nSrc = 0;
- size_t nDst = 0;
- size_t nRead = 0;
- size_t nRet = 0;
- char *pSrcBuf = szSrcBuf;
- char *pDstBuf = szDstBuf;
- iconv_t icv;
- int argument = 1;
- //check input arguments
- if(argc != 3)
- {
- usage();
- return -1;
- }
- pSrcFile = fopen(argv[1],"r");
- if(pSrcFile == NULL)
- {
- printf("can't open source file!/n");
- return -1;
- }
- pDstFile = fopen(argv[2],"w");
- if(pSrcFile == NULL)
- {
- printf("can't open destination file!/n");
- return -1;
- }
- //initialize iconv routine, perform conversion from BIG5 to GB2312
- //TODO: if you want to perfom other type of coversion, e.g. GB2312->BIG5, GB2312->UTF-8 ...
- //just change following two paremeters of iconv_open()
- icv = iconv_open("GB2312","BIG5");
- if(icv == 0)
- {
- printf("can't initalize iconv routine!/n");
- return -1;
- }
- //enable "illegal sequence discard and continue" feature, so that if met illeagal sequence,
- //conversion will continue instead of being terminated
- if(iconvctl (icv ,ICONV_SET_DISCARD_ILSEQ,&argument) != 0)
- {
- printf("can't enable /"illegal sequence discard and continue/" feature!/n");
- return -1;
- }
- while(!feof(pSrcFile))
- {
- pSrcBuf = szSrcBuf;
- pDstBuf = szDstBuf;
- nDst = BUFFER_SIZE;
- // read data from source file
- nRead = fread(szSrcBuf + nSrc,sizeof(char),BUFFER_SIZE - nSrc,pSrcFile);
- if(nRead == 0)
- break;
- // the amount of data to be converted should include previous left data and current read data
- nSrc = nSrc + nRead;
- //perform conversion
- nRet = iconv(icv,(const char**)&pSrcBuf,&nSrc,&pDstBuf,&nDst);
- if(nRet == -1)
- {
- // include all case of errno: E2BIG, EILSEQ, EINVAL
- // E2BIG: There is not sufficient room at *outbuf.
- // EILSEQ: An invalid multibyte sequence has been encountered in the input.
- // EINVAL: An incomplete multibyte sequence has been encountered in the input
- // move the left data to the head of szSrcBuf in other to link it with the next data block
- memmove(szSrcBuf,pSrcBuf,nSrc);
- }
- //wirte data to destination file
- fwrite(szDstBuf,sizeof(char),BUFFER_SIZE - nDst,pDstFile);
- }
- iconv_close(icv);
- fclose(pSrcFile);
- fclose(pDstFile);
- printf("conversion complete./n");
- return 0;
- }