PHP 中文字符串截取方法汇总
1、可通过指定编码进行字符串截取。
/**
* @todo 中文截取,支持gb2312,gbk,utf-8,big5
*
* @param string $str 要截取的字串
* @param int $start 截取起始位置
* @param int $length 截取长度
* @param string $charset utf-8|gb2312|gbk|big5 编码
* @param $suffix 是否加尾缀
*/
function CsubStrPro($str, $start = 0, $length, $charset = "utf-8", $suffix = true)
{
if (function_exists ( "mb_substr" ))
return mb_substr ( $str, $start, $length, $charset );
$re ['utf-8'] = "/[ - ]|[Â-ß][€-¿]|[à-ï][€-¿]{2}|[ð-ÿ][€-¿]{3}/";
$re ['gb2312'] = "/[ - ]|[°-÷][ -þ]/";
$re ['gbk'] = "/[ - ]|[ -þ][@-þ]/";
$re ['big5'] = "/[ - ]|[ -þ]([@-~]|¡-þ])/";
preg_match_all ( $re [$charset], $str, $match );
$slice = join ( "", array_slice ( $match [0], $start, $length ) );
if ($suffix)
return $slice . "…";
return $slice;
}
2、自动识别 GBK 和UTF-8 编码的字符串:
function cutStr($sourcestr, $cutstart=0, $cutlength) {
$returnstr = '';
$i = 0;
$n = 0;
$str_length = strlen ( $sourcestr ); //字符串的字节数
while ( ($n < $cutlength) and ($i <= $str_length) ) {
$temp_str = substr ( $sourcestr, $i, 1 );
$ascnum = Ord ( $temp_str ); //得到字符串中第$i位字符的ascii码
if ($ascnum >= 224) //如果ASCII位高与224,
{
$returnstr = $returnstr . substr ( $sourcestr, $i, 3 ); //根据UTF-8编码规范,将3个连续的字符计为单个字符
$i = $i + 3; //实际Byte计为3
$n ++; //字串长度计1
}
elseif ($ascnum >= 192) //如果ASCII位高与192,
{
$returnstr = $returnstr . substr ( $sourcestr, $i, 2 ); //根据UTF-8编码规范,将2个连续的字符计为单个字符
$i = $i + 2; //实际Byte计为2
$n ++; //字串长度计1
}
elseif ($ascnum >= 65 && $ascnum <= 90) //如果是大写字母,
{
$returnstr = $returnstr . substr ( $sourcestr, $i, 1 );
$i = $i + 1; //实际的Byte数仍计1个
$n ++; //但考虑整体美观,大写字母计成一个高位字符
}
else //其他情况下,包括小写字母和半角标点符号,
{
$returnstr = $returnstr . substr ( $sourcestr, $i, 1 );
$i = $i + 1; //实际的Byte数计1个
$n = $n + 0.5; //小写字母和半角标点等与半个高位字符宽...
}
}
if ($str_length > $i) {
$returnstr = $returnstr . "..."; //超过长度时在尾处加上省略号
}
return $returnstr;
}
来源: http://www.taocms.org/99.html