79 lines
1.5 KiB
Go
79 lines
1.5 KiB
Go
package whois
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/encoding"
|
|
"golang.org/x/text/encoding/simplifiedchinese"
|
|
"golang.org/x/text/encoding/traditionalchinese"
|
|
)
|
|
|
|
func decodeWhoisPayload(raw []byte) (string, string) {
|
|
trimmed := bytes.Trim(raw, "\x00")
|
|
if len(trimmed) == 0 {
|
|
return "", "unknown"
|
|
}
|
|
|
|
if bytes.HasPrefix(trimmed, []byte{0xEF, 0xBB, 0xBF}) {
|
|
trimmed = trimmed[3:]
|
|
}
|
|
if utf8.Valid(trimmed) {
|
|
return string(trimmed), "utf-8"
|
|
}
|
|
|
|
type candidate struct {
|
|
name string
|
|
enc encoding.Encoding
|
|
}
|
|
candidates := []candidate{
|
|
{name: "gb18030", enc: simplifiedchinese.GB18030},
|
|
{name: "gbk", enc: simplifiedchinese.GBK},
|
|
{name: "big5", enc: traditionalchinese.Big5},
|
|
}
|
|
|
|
bestText := string(trimmed)
|
|
bestName := "binary"
|
|
bestScore := -1 << 30
|
|
|
|
for _, c := range candidates {
|
|
decoded, err := c.enc.NewDecoder().Bytes(trimmed)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
text := string(decoded)
|
|
score := scoreDecodedText(text)
|
|
if score > bestScore {
|
|
bestScore = score
|
|
bestText = text
|
|
bestName = c.name
|
|
}
|
|
}
|
|
return bestText, bestName
|
|
}
|
|
|
|
func scoreDecodedText(s string) int {
|
|
if strings.TrimSpace(s) == "" {
|
|
return -10000
|
|
}
|
|
|
|
score := 0
|
|
for _, r := range s {
|
|
switch {
|
|
case r == utf8.RuneError:
|
|
score -= 80
|
|
case r == '\n' || r == '\r' || r == '\t':
|
|
score += 2
|
|
case unicode.IsLetter(r) || unicode.IsDigit(r):
|
|
score += 3
|
|
case unicode.IsPrint(r):
|
|
score += 1
|
|
default:
|
|
score -= 2
|
|
}
|
|
}
|
|
return score
|
|
}
|