UTF-8一個漢字佔2個字符,GBK一個漢字佔3個字符,ASCII碼最大能表示到十進制128,16進制0x80,所以當某個字符當ASCII碼超出最大值128時,則代表中文漢字,再根據編碼跳過對應字節數即可
function mbstrlen($str, $type = 'utf-8')
{
$len = strlen($str);
if ($len <= 0)
{
return 0;
}
$count = 0;
$step = $type == 'utf-8' ? 2 : 3;
for ($i = 0; $i < $len; $i++)
{
$count++;
if (ord($str{$i}) >= 128)
{
$i += $step;
}
}
return $count;
}
$str = '程序猿';
echo strlen($str) . "\n";
echo mbstrlen($str) . "\n";
$str = mb_convert_encoding($str, 'utf-8', 'gbk');;
echo mbstrlen($str, 'gbk') . "\n";
[Running] php "/Users/why/Desktop/php/shmop.php"
9
3
3
[Done] exited with code=0 in 0.288 seconds