whoissdk/charset.go
2026-03-19 11:53:07 +08:00

79 lines
1.5 KiB
Go

package whois
import (
"bytes"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
)
func decodeWhoisPayload(raw []byte) (string, string) {
trimmed := bytes.Trim(raw, "\x00")
if len(trimmed) == 0 {
return "", "unknown"
}
if bytes.HasPrefix(trimmed, []byte{0xEF, 0xBB, 0xBF}) {
trimmed = trimmed[3:]
}
if utf8.Valid(trimmed) {
return string(trimmed), "utf-8"
}
type candidate struct {
name string
enc encoding.Encoding
}
candidates := []candidate{
{name: "gb18030", enc: simplifiedchinese.GB18030},
{name: "gbk", enc: simplifiedchinese.GBK},
{name: "big5", enc: traditionalchinese.Big5},
}
bestText := string(trimmed)
bestName := "binary"
bestScore := -1 << 30
for _, c := range candidates {
decoded, err := c.enc.NewDecoder().Bytes(trimmed)
if err != nil {
continue
}
text := string(decoded)
score := scoreDecodedText(text)
if score > bestScore {
bestScore = score
bestText = text
bestName = c.name
}
}
return bestText, bestName
}
func scoreDecodedText(s string) int {
if strings.TrimSpace(s) == "" {
return -10000
}
score := 0
for _, r := range s {
switch {
case r == utf8.RuneError:
score -= 80
case r == '\n' || r == '\r' || r == '\t':
score += 2
case unicode.IsLetter(r) || unicode.IsDigit(r):
score += 3
case unicode.IsPrint(r):
score += 1
default:
score -= 2
}
}
return score
}