php和js中，utf-8編碼轉成base64編碼

1、php下轉化base64編碼

php中，文本文件的編碼決定了程序變量的編碼，比如以下代碼在不同編碼的php文件中，展示的效果也是不一樣的

<?php
    $word = '嚴';
    echo base64_encode($word);
?>

如果文本文件是utf-8，則輸出結果爲：5Lil，而在gbk文件中的輸出結果是：0c8=

注意：php中轉換成base64編碼時，和網頁編碼沒有關係。

2、js下轉化base64編碼

首先，假定網頁的編碼是utf-8，我們希望對於同樣的字符串，用PHP和Javascript可以得到同樣的Base64編碼。

這裏就會產生一個問題。因爲Javascript內部的字符串，都以utf-16的形式進行保存，因此編碼的時候，我們首先必須將utf-8的值轉成utf-16再編碼，解碼的時候，則是解碼後還需要將utf-16的值轉回成utf-8。

<html>
<head>
</head>
<body>
<script>
/* utf.js - UTF-8 <=> UTF-16 convertion
*
* Copyright (C) 1999 Masanao Izumo <[email protected]>
* Version: 1.0
* LastModified: Dec 25 1999
* This library is free. You can redistribute it and/or modify it.
*/
/*
* Interfaces:
* utf8 = utf16to8(utf16);
* utf16 = utf8to16(utf8);
*/
function utf16to8(str) {
    var out, i, len, c;
    out = "";
    len = str.length;
    for(i = 0; i < len; i++) {
    c = str.charCodeAt(i);
    if ((c >= 0x0001) && (c <= 0x007F)) {
    out += str.charAt(i);
    } else if (c > 0x07FF) {
    out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
    out += String.fromCharCode(0x80 | ((c >> 6) & 0x3F));
    out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
    } else {
    out += String.fromCharCode(0xC0 | ((c >> 6) & 0x1F));
    out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
    }
    }
    return out;
}

function utf8to16(str) {
    var out, i, len, c;
    var char2, char3;
    out = "";
    len = str.length;
    i = 0;
    while(i < len) {
    c = str.charCodeAt(i++);
    switch(c >> 4)
    {
    case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
    // 0xxxxxxx
    out += str.charAt(i-1);
    break;
    case 12: case 13:
    // 110x xxxx 10xx xxxx
    char2 = str.charCodeAt(i++);
    out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
    break;
    case 14:
    // 1110 xxxx 10xx xxxx 10xx xxxx
    char2 = str.charCodeAt(i++);
    char3 = str.charCodeAt(i++);
    out += String.fromCharCode(((c & 0x0F) << 12) |
    ((char2 & 0x3F) << 6) |
    ((char3 & 0x3F) << 0));
    break;
    }
    }
    return out;
}
/* Copyright (C) 1999 Masanao Izumo <[email protected]>
* Version: 1.0
* LastModified: Dec 25 1999
* This library is free. You can redistribute it and/or modify it.
*/
/*
* Interfaces:
* b64 = base64encode(data);
* data = base64decode(b64);
*/

var base64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var base64DecodeChars = new Array(
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1);

function base64encode(str) {
    var out, i, len;
    var c1, c2, c3;
    len = str.length;
    i = 0;
    out = "";
    while(i < len) {
    c1 = str.charCodeAt(i++) & 0xff;
    if(i == len)
    {
    out += base64EncodeChars.charAt(c1 >> 2);
    out += base64EncodeChars.charAt((c1 & 0x3) << 4);
    out += "==";
    break;
    }
    c2 = str.charCodeAt(i++);
    if(i == len)
    {
    out += base64EncodeChars.charAt(c1 >> 2);
    out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4));
    out += base64EncodeChars.charAt((c2 & 0xF) << 2);
    out += "=";
    break;
    }
    c3 = str.charCodeAt(i++);
    out += base64EncodeChars.charAt(c1 >> 2);
    out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4));
    out += base64EncodeChars.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6));
    out += base64EncodeChars.charAt(c3 & 0x3F);
    }
    return out;
}

function base64decode(str) {
    var c1, c2, c3, c4;
    var i, len, out;
    len = str.length;
    i = 0;
    out = "";
    while(i < len) {
    /* c1 */
    do {
    c1 = base64DecodeChars[str.charCodeAt(i++) & 0xff];
    } while(i < len && c1 == -1);
    if(c1 == -1)
    break;
    /* c2 */
    do {
    c2 = base64DecodeChars[str.charCodeAt(i++) & 0xff];
    } while(i < len && c2 == -1);
    if(c2 == -1)
    break;
    out += String.fromCharCode((c1 << 2) | ((c2 & 0x30) >> 4));
    /* c3 */
    do {
    c3 = str.charCodeAt(i++) & 0xff;
    if(c3 == 61)
    return out;
    c3 = base64DecodeChars[c3];
    } while(i < len && c3 == -1);
    if(c3 == -1)
    break;
    out += String.fromCharCode(((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2));
    /* c4 */
    do {
    c4 = str.charCodeAt(i++) & 0xff;
    if(c4 == 61)
    return out;
    c4 = base64DecodeChars[c4];
    } while(i < len && c4 == -1);
    if(c4 == -1)
    break;
    out += String.fromCharCode(((c3 & 0x03) << 6) | c4);
    }
    return out;
}
var str = '嚴';
var sEncoded=base64encode(utf16to8(str));
console.log(sEncoded);
</script>
</body>
</html>

文件編碼無論是utf-8或者gbk編碼，輸出的都是5Lil。注意文件編碼和網頁編碼需要統一。

3、base64編碼原理

所謂Base64，就是說選出64個字符----小寫字母a-z、大寫字母A-Z、數字0-9、符號"+"、"/"（再加上作爲墊字的"="，實際上是65個字符）----作爲一個基本字符集。然後，其他所有符號都轉換成這個字符集中的字符。它是用64個可打印字符表示二進制所有數據方法，這64個字符，對應表如下：

具體來說，轉換方式可以分爲四步。

第一步，將每三個字節作爲一組，一共是24個二進制位。

第二步，將這24個二進制位分爲四組，每個組有6個二進制位。

第三步，在每組前面加兩個00，擴展成32個二進制位，即四個字節。

第四步，根據上表，得到擴展後的每個字節的對應符號，這就是Base64的編碼值。

因爲，Base64將三個字節轉化成四個字節，因此Base64編碼後的文本，會比原文本大出三分之一左右。

舉一個具體的實例，演示英語單詞Man如何轉成Base64編碼。

Text content	M								a								n
ASCII	77								97								110
Bit pattern	0	1	0	0	1	1	0	1	0	1	1	0	0	0	0	1	0	1	1	0	1	1	1	0
Index	19						22						5						46
Base64-Encoded	T						W						F						u

第一步，"M"、"a"、"n"的ASCII值分別是77、97、110，對應的二進制值是01001101、01100001、01101110，將它們連成一個24位的二進制字符串010011010110000101101110。

第二步，將這個24位的二進制字符串分成4組，每組6個二進制位：010011、010110、000101、101110。

第三步，在每組前面加兩個00，擴展成32個二進制位，即四個字節：00010011、00010110、00000101、00101110。它們的十進制值分別是19、22、5、46。

第四步，根據上表，得到每個值對應Base64編碼，即T、W、F、u。

因此，Man的Base64編碼就是TWFu。

如果字節數不足三，則這樣處理：

a）二個字節的情況：將這二個字節的一共16個二進制位，按照上面的規則，轉成三組，最後一組除了前面加兩個0以外，後面也要加兩個0。這樣得到一個三位的Base64編碼，再在末尾補上一個"="號。

比如，"Ma"這個字符串是兩個字節，可以轉化成三組00010011、00010110、00010000以後，對應Base64值分別爲T、W、E，再補上一個"="號，因此"Ma"的Base64編碼就是TWE=

b）一個字節的情況：將這一個字節的8個二進制位，按照上面的規則轉成二組，最後一組除了前面加二個0以外，後面再加4個0。這樣得到一個二位的Base64編碼，再在末尾補上兩個"="號。

比如，"M"這個字母是一個字節，可以轉化爲二組00010011、00010000，對應的Base64值分別爲T、Q，再補上二個"="號，因此"M"的Base64編碼就是TQ==

再舉一箇中文的例子，漢字"嚴"如何轉化成Base64編碼？

這裏需要注意，漢字本身可以有多種編碼，比如gb2312、utf-8、gbk等等，每一種編碼的Base64對應值都不一樣。下面的例子以utf-8爲例。

首先，"嚴"的utf-8編碼爲E4B8A5，寫成二進制就是三字節的"11100100 10111000 10100101"。將這個24位的二進制字符串，按照第3節中的規則，轉換成四組一共32位的二進制值"00111001 00001011 00100010 00100101"，相應的十進制數爲57、11、34、37，它們對應的Base64值就爲5、L、i、l。

所以，漢字"嚴"（utf-8編碼）的Base64值就是5Lil。

源碼地址：http://git.oschina.net/kjr/kangjianrong_public/tree/master/base64?dir=1&filepath=base64&oid=cf9a5a3d26e59e9312ecb91788a895f5b1880703&sha=700c885e658ffda59bfa7aa8ba2de07fe240d246

參考：http://www.ruanyifeng.com/blog/2008/06/base64.html

php和js中，utf-8編碼轉成base64編碼

釘釘打卡速度慢

Nginx R31 doc 官方文檔-01-nginx 如何安裝

Qt/C++音視頻開發74-合併標籤圖形/生成yolo運算結果圖形/文字和圖形合併成一個/水印濾鏡

挑戰程序設計競賽 2.2章習題 POJ - 3617 Best Cow Line 貪心

字節面試：MySQL什麼時候鎖表？如何防止鎖表？

.NET8連接SQL SERVER 2008 R2 報：證書鏈是由不受信任的頒發機構頒發的

golang開發環境搭建(win10)

python計算機視覺學習筆記——PIL庫的用法

Golang初學：獲取程序內存使用情況，std runtime

下載並安裝LNMP一鍵安裝包

ajax 跨域提示重新載入頁面以獲取源代碼

單例模式最簡單

php中的和的區別

php 打印小票模板

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結