首页 > Other > php UTF-8编码检测

php UTF-8编码检测

/*
* @Author ZhangYan
* @return integer >0(==1 Ascii ==2 UTF-8 OR with BOM) UTF-8 编码 否则非UTF-8
*/
function IsUtf8($text) {
if (strlen($text) < 3)
return false;
$lastch = 0;
$begin = 0;
$BOM = true;
$BOMchs = array(0xEF, 0xBB, 0xBF);
$good = 0;
$bad = 0;
$notAscii = 0;
for ($i = 0; $i < strlen($text); $i++) {
$ch = ord($text[$i]);
if ($begin < 3) {
$BOM = ($BOMchs[$begin] == $ch);
$begin += 1;
continue;
}

if ($begin == 4 && $BOM)
break;

if ($ch >= 0x80)
$notAscii++;

if (($ch & 0xC0) == 0x80) {
if (($lastch & 0xC0) == 0xC0) {
$good += 1;
} else if (($lastch & 0x80) == 0) {
$bad += 1;
}
} else if (($lastch & 0xC0) == 0xC0) {
$bad += 1;
}
$lastch = $ch;
}
if ($begin == 4 && $BOM) {
return 2;
} else if ($notAscii == 0) {
return 1;
} else if ($good >= $bad) {
return 2;
} else {
return 0;
}
}
/*********************************************
* 函数名:iconv_utf8($string)
* 函数用途:将未知字符串转换为UTF-8字符串
* 创建时间:2008-12-19
* 创建人:张宴
* 参数说明:
* $string 字符串
* 返回值:
* UTF-8编码的字符串
*********************************************/
function iconv_utf8($string) {
$is_utf8 = preg_match(‘%^(?:
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$%xs’, $string);
//1表示UTF-8编码,0表示GBK、GB2312等其他编码
if ($is_utf8 == 1) {
$utf8_string = $string;
} else {
$utf8_string = iconv(“GBK”, “UTF-8”, $string);
}
return $utf8_string;
}

分类:Other
  1. 还没有评论。
  1. No trackbacks yet.

发表评论

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / 更改 )

Twitter picture

You are commenting using your Twitter account. Log Out / 更改 )

Facebook photo

You are commenting using your Facebook account. Log Out / 更改 )

Google+ photo

You are commenting using your Google+ account. Log Out / 更改 )

Connecting to %s

%d 博主赞过: