stardb/internal/sqlruntime/fingerprint.go

324 lines
5.8 KiB
Go
Raw Normal View History

package sqlruntime
import "strings"
// FingerprintSQL creates a normalized SQL fingerprint.
// mode controls literal masking; keepComments controls whether comments are preserved.
func FingerprintSQL(query string, mode int, keepComments bool) string {
prepared := query
if !keepComments {
prepared = stripSQLComments(prepared)
}
normalized := normalizeSQL(prepared)
if normalized == "" {
return ""
}
if NormalizeFingerprintMode(mode) == fingerprintModeMaskLiterals {
return maskSQLLiterals(normalized, keepComments)
}
return normalized
}
func normalizeSQL(query string) string {
normalized := strings.ToLower(strings.TrimSpace(query))
if normalized == "" {
return ""
}
return strings.Join(strings.Fields(normalized), " ")
}
func stripSQLComments(query string) string {
if query == "" {
return ""
}
const (
stateNormal = iota
stateSingleQuote
stateDoubleQuote
stateBacktick
stateLineComment
stateBlockComment
)
var b strings.Builder
b.Grow(len(query))
state := stateNormal
for i := 0; i < len(query); i++ {
c := query[i]
switch state {
case stateNormal:
if c == '\'' {
state = stateSingleQuote
b.WriteByte(c)
continue
}
if c == '"' {
state = stateDoubleQuote
b.WriteByte(c)
continue
}
if c == '`' {
state = stateBacktick
b.WriteByte(c)
continue
}
if c == '-' && i+1 < len(query) && query[i+1] == '-' {
b.WriteByte(' ')
i++
state = stateLineComment
continue
}
if c == '/' && i+1 < len(query) && query[i+1] == '*' {
b.WriteByte(' ')
i++
state = stateBlockComment
continue
}
b.WriteByte(c)
case stateSingleQuote:
b.WriteByte(c)
if c == '\'' {
if i+1 < len(query) && query[i+1] == '\'' {
i++
b.WriteByte(query[i])
continue
}
state = stateNormal
}
case stateDoubleQuote:
b.WriteByte(c)
if c == '"' {
if i+1 < len(query) && query[i+1] == '"' {
i++
b.WriteByte(query[i])
continue
}
state = stateNormal
}
case stateBacktick:
b.WriteByte(c)
if c == '`' {
state = stateNormal
}
case stateLineComment:
if c == '\n' {
b.WriteByte(' ')
state = stateNormal
}
case stateBlockComment:
if c == '*' && i+1 < len(query) && query[i+1] == '/' {
b.WriteByte(' ')
i++
state = stateNormal
}
}
}
return b.String()
}
func maskSQLLiterals(query string, keepComments bool) string {
if query == "" {
return ""
}
const (
stateNormal = iota
stateSingleQuote
stateDoubleQuote
stateBacktick
stateLineComment
stateBlockComment
)
var b strings.Builder
b.Grow(len(query))
state := stateNormal
for i := 0; i < len(query); i++ {
c := query[i]
switch state {
case stateNormal:
if c == '\'' {
b.WriteByte('?')
state = stateSingleQuote
continue
}
if c == '"' {
b.WriteByte(c)
state = stateDoubleQuote
continue
}
if c == '`' {
b.WriteByte(c)
state = stateBacktick
continue
}
if c == '-' && i+1 < len(query) && query[i+1] == '-' {
if keepComments {
b.WriteByte(c)
i++
b.WriteByte(query[i])
} else {
b.WriteByte(' ')
i++
}
state = stateLineComment
continue
}
if c == '/' && i+1 < len(query) && query[i+1] == '*' {
if keepComments {
b.WriteByte(c)
i++
b.WriteByte(query[i])
} else {
b.WriteByte(' ')
i++
}
state = stateBlockComment
continue
}
if c == '$' {
j := i + 1
for j < len(query) && isDigit(query[j]) {
j++
}
if j > i+1 {
b.WriteByte('?')
i = j - 1
continue
}
}
if c == '-' && i+1 < len(query) && isDigit(query[i+1]) && isNumberBoundaryBefore(query, i) {
j := scanNumber(query, i+1)
if isNumberBoundaryAfter(query, j) {
b.WriteByte('?')
i = j - 1
continue
}
}
if isDigit(c) && isNumberBoundaryBefore(query, i) {
j := scanNumber(query, i)
if isNumberBoundaryAfter(query, j) {
b.WriteByte('?')
i = j - 1
continue
}
}
b.WriteByte(c)
case stateSingleQuote:
if c == '\'' {
if i+1 < len(query) && query[i+1] == '\'' {
i++
continue
}
state = stateNormal
}
case stateDoubleQuote:
b.WriteByte(c)
if c == '"' {
if i+1 < len(query) && query[i+1] == '"' {
i++
b.WriteByte(query[i])
continue
}
state = stateNormal
}
case stateBacktick:
b.WriteByte(c)
if c == '`' {
state = stateNormal
}
case stateLineComment:
if keepComments {
b.WriteByte(c)
}
if c == '\n' {
if !keepComments {
b.WriteByte(' ')
}
state = stateNormal
}
case stateBlockComment:
if keepComments {
b.WriteByte(c)
}
if c == '*' && i+1 < len(query) && query[i+1] == '/' {
if keepComments {
i++
b.WriteByte(query[i])
} else {
b.WriteByte(' ')
i++
}
state = stateNormal
}
}
}
return strings.Join(strings.Fields(b.String()), " ")
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isIdentifierChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'
}
func isNumberBoundaryBefore(query string, index int) bool {
if index <= 0 {
return true
}
prev := query[index-1]
return !isIdentifierChar(prev) && prev != '$' && prev != '.'
}
func isNumberBoundaryAfter(query string, index int) bool {
if index >= len(query) {
return true
}
next := query[index]
return !isIdentifierChar(next) && next != '.'
}
func scanNumber(query string, start int) int {
i := start
for i < len(query) && isDigit(query[i]) {
i++
}
if i < len(query) && query[i] == '.' {
i++
for i < len(query) && isDigit(query[i]) {
i++
}
}
if i < len(query) && (query[i] == 'e' || query[i] == 'E') {
i++
if i < len(query) && (query[i] == '+' || query[i] == '-') {
i++
}
for i < len(query) && isDigit(query[i]) {
i++
}
}
return i
}