You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
package startext
|
|
|
|
|
|
|
|
|
|
import "golang.org/x/text/encoding/simplifiedchinese"
|
|
|
|
|
|
|
|
|
|
func IsUtf8(data []byte) bool {
|
|
|
|
|
return isUtf8(data)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func IsGBK(data []byte) bool {
|
|
|
|
|
return (!isUtf8(data)) && isGBK(data)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isGBK(data []byte) bool {
|
|
|
|
|
length := len(data)
|
|
|
|
|
var i int = 0
|
|
|
|
|
for i < length {
|
|
|
|
|
if data[i] <= 0x7f {
|
|
|
|
|
//编码0~127,只有一个字节的编码,兼容ASCII码
|
|
|
|
|
i++
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
//大于127的使用双字节编码,落在gbk编码范围内的字符
|
|
|
|
|
if data[i] >= 0x81 &&
|
|
|
|
|
data[i] <= 0xfe &&
|
|
|
|
|
data[i+1] >= 0x40 &&
|
|
|
|
|
data[i+1] <= 0xfe &&
|
|
|
|
|
data[i+1] != 0x7f {
|
|
|
|
|
i += 2
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//UTF-8编码格式的判断
|
|
|
|
|
|
|
|
|
|
func preNUm(data byte) int {
|
|
|
|
|
var mask byte = 0x80
|
|
|
|
|
var num int = 0
|
|
|
|
|
//8bit中首个0bit前有多少个1bits
|
|
|
|
|
for i := 0; i < 8; i++ {
|
|
|
|
|
if (data & mask) == mask {
|
|
|
|
|
num++
|
|
|
|
|
mask = mask >> 1
|
|
|
|
|
} else {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return num
|
|
|
|
|
}
|
|
|
|
|
func isUtf8(data []byte) bool {
|
|
|
|
|
i := 0
|
|
|
|
|
for i < len(data) {
|
|
|
|
|
if (data[i] & 0x80) == 0x00 {
|
|
|
|
|
// 0XXX_XXXX
|
|
|
|
|
i++
|
|
|
|
|
continue
|
|
|
|
|
} else if num := preNUm(data[i]); num > 2 {
|
|
|
|
|
// 110X_XXXX 10XX_XXXX
|
|
|
|
|
// 1110_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
|
|
// 1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
|
|
// 1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
|
|
// 1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX
|
|
|
|
|
// preNUm() 返回首个字节的8个bits中首个0bit前面1bit的个数,该数量也是该字符所使用的字节数
|
|
|
|
|
i++
|
|
|
|
|
for j := 0; j < num-1; j++ {
|
|
|
|
|
//判断后面的 num - 1 个字节是不是都是10开头
|
|
|
|
|
if (data[i] & 0xc0) != 0x80 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
//其他情况说明不是utf-8
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func GBK2UTF8(data []byte) ([]byte,error) {
|
|
|
|
|
return simplifiedchinese.GBK.NewDecoder().Bytes(data)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func UTF82GBK(data []byte) ([]byte,error) {
|
|
|
|
|
return simplifiedchinese.GBK.NewEncoder().Bytes(data)
|
|
|
|
|
}
|