324 lines
5.8 KiB
Go
324 lines
5.8 KiB
Go
package sqlruntime
|
|
|
|
import "strings"
|
|
|
|
// FingerprintSQL creates a normalized SQL fingerprint.
|
|
// mode controls literal masking; keepComments controls whether comments are preserved.
|
|
func FingerprintSQL(query string, mode int, keepComments bool) string {
|
|
prepared := query
|
|
if !keepComments {
|
|
prepared = stripSQLComments(prepared)
|
|
}
|
|
|
|
normalized := normalizeSQL(prepared)
|
|
if normalized == "" {
|
|
return ""
|
|
}
|
|
|
|
if NormalizeFingerprintMode(mode) == fingerprintModeMaskLiterals {
|
|
return maskSQLLiterals(normalized, keepComments)
|
|
}
|
|
return normalized
|
|
}
|
|
|
|
func normalizeSQL(query string) string {
|
|
normalized := strings.ToLower(strings.TrimSpace(query))
|
|
if normalized == "" {
|
|
return ""
|
|
}
|
|
return strings.Join(strings.Fields(normalized), " ")
|
|
}
|
|
|
|
func stripSQLComments(query string) string {
|
|
if query == "" {
|
|
return ""
|
|
}
|
|
|
|
const (
|
|
stateNormal = iota
|
|
stateSingleQuote
|
|
stateDoubleQuote
|
|
stateBacktick
|
|
stateLineComment
|
|
stateBlockComment
|
|
)
|
|
|
|
var b strings.Builder
|
|
b.Grow(len(query))
|
|
state := stateNormal
|
|
|
|
for i := 0; i < len(query); i++ {
|
|
c := query[i]
|
|
|
|
switch state {
|
|
case stateNormal:
|
|
if c == '\'' {
|
|
state = stateSingleQuote
|
|
b.WriteByte(c)
|
|
continue
|
|
}
|
|
if c == '"' {
|
|
state = stateDoubleQuote
|
|
b.WriteByte(c)
|
|
continue
|
|
}
|
|
if c == '`' {
|
|
state = stateBacktick
|
|
b.WriteByte(c)
|
|
continue
|
|
}
|
|
if c == '-' && i+1 < len(query) && query[i+1] == '-' {
|
|
b.WriteByte(' ')
|
|
i++
|
|
state = stateLineComment
|
|
continue
|
|
}
|
|
if c == '/' && i+1 < len(query) && query[i+1] == '*' {
|
|
b.WriteByte(' ')
|
|
i++
|
|
state = stateBlockComment
|
|
continue
|
|
}
|
|
b.WriteByte(c)
|
|
|
|
case stateSingleQuote:
|
|
b.WriteByte(c)
|
|
if c == '\'' {
|
|
if i+1 < len(query) && query[i+1] == '\'' {
|
|
i++
|
|
b.WriteByte(query[i])
|
|
continue
|
|
}
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateDoubleQuote:
|
|
b.WriteByte(c)
|
|
if c == '"' {
|
|
if i+1 < len(query) && query[i+1] == '"' {
|
|
i++
|
|
b.WriteByte(query[i])
|
|
continue
|
|
}
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateBacktick:
|
|
b.WriteByte(c)
|
|
if c == '`' {
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateLineComment:
|
|
if c == '\n' {
|
|
b.WriteByte(' ')
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateBlockComment:
|
|
if c == '*' && i+1 < len(query) && query[i+1] == '/' {
|
|
b.WriteByte(' ')
|
|
i++
|
|
state = stateNormal
|
|
}
|
|
}
|
|
}
|
|
|
|
return b.String()
|
|
}
|
|
|
|
func maskSQLLiterals(query string, keepComments bool) string {
|
|
if query == "" {
|
|
return ""
|
|
}
|
|
|
|
const (
|
|
stateNormal = iota
|
|
stateSingleQuote
|
|
stateDoubleQuote
|
|
stateBacktick
|
|
stateLineComment
|
|
stateBlockComment
|
|
)
|
|
|
|
var b strings.Builder
|
|
b.Grow(len(query))
|
|
state := stateNormal
|
|
|
|
for i := 0; i < len(query); i++ {
|
|
c := query[i]
|
|
|
|
switch state {
|
|
case stateNormal:
|
|
if c == '\'' {
|
|
b.WriteByte('?')
|
|
state = stateSingleQuote
|
|
continue
|
|
}
|
|
if c == '"' {
|
|
b.WriteByte(c)
|
|
state = stateDoubleQuote
|
|
continue
|
|
}
|
|
if c == '`' {
|
|
b.WriteByte(c)
|
|
state = stateBacktick
|
|
continue
|
|
}
|
|
if c == '-' && i+1 < len(query) && query[i+1] == '-' {
|
|
if keepComments {
|
|
b.WriteByte(c)
|
|
i++
|
|
b.WriteByte(query[i])
|
|
} else {
|
|
b.WriteByte(' ')
|
|
i++
|
|
}
|
|
state = stateLineComment
|
|
continue
|
|
}
|
|
if c == '/' && i+1 < len(query) && query[i+1] == '*' {
|
|
if keepComments {
|
|
b.WriteByte(c)
|
|
i++
|
|
b.WriteByte(query[i])
|
|
} else {
|
|
b.WriteByte(' ')
|
|
i++
|
|
}
|
|
state = stateBlockComment
|
|
continue
|
|
}
|
|
if c == '$' {
|
|
j := i + 1
|
|
for j < len(query) && isDigit(query[j]) {
|
|
j++
|
|
}
|
|
if j > i+1 {
|
|
b.WriteByte('?')
|
|
i = j - 1
|
|
continue
|
|
}
|
|
}
|
|
if c == '-' && i+1 < len(query) && isDigit(query[i+1]) && isNumberBoundaryBefore(query, i) {
|
|
j := scanNumber(query, i+1)
|
|
if isNumberBoundaryAfter(query, j) {
|
|
b.WriteByte('?')
|
|
i = j - 1
|
|
continue
|
|
}
|
|
}
|
|
if isDigit(c) && isNumberBoundaryBefore(query, i) {
|
|
j := scanNumber(query, i)
|
|
if isNumberBoundaryAfter(query, j) {
|
|
b.WriteByte('?')
|
|
i = j - 1
|
|
continue
|
|
}
|
|
}
|
|
b.WriteByte(c)
|
|
|
|
case stateSingleQuote:
|
|
if c == '\'' {
|
|
if i+1 < len(query) && query[i+1] == '\'' {
|
|
i++
|
|
continue
|
|
}
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateDoubleQuote:
|
|
b.WriteByte(c)
|
|
if c == '"' {
|
|
if i+1 < len(query) && query[i+1] == '"' {
|
|
i++
|
|
b.WriteByte(query[i])
|
|
continue
|
|
}
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateBacktick:
|
|
b.WriteByte(c)
|
|
if c == '`' {
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateLineComment:
|
|
if keepComments {
|
|
b.WriteByte(c)
|
|
}
|
|
if c == '\n' {
|
|
if !keepComments {
|
|
b.WriteByte(' ')
|
|
}
|
|
state = stateNormal
|
|
}
|
|
|
|
case stateBlockComment:
|
|
if keepComments {
|
|
b.WriteByte(c)
|
|
}
|
|
if c == '*' && i+1 < len(query) && query[i+1] == '/' {
|
|
if keepComments {
|
|
i++
|
|
b.WriteByte(query[i])
|
|
} else {
|
|
b.WriteByte(' ')
|
|
i++
|
|
}
|
|
state = stateNormal
|
|
}
|
|
}
|
|
}
|
|
|
|
return strings.Join(strings.Fields(b.String()), " ")
|
|
}
|
|
|
|
func isDigit(c byte) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isIdentifierChar(c byte) bool {
|
|
return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'
|
|
}
|
|
|
|
func isNumberBoundaryBefore(query string, index int) bool {
|
|
if index <= 0 {
|
|
return true
|
|
}
|
|
prev := query[index-1]
|
|
return !isIdentifierChar(prev) && prev != '$' && prev != '.'
|
|
}
|
|
|
|
func isNumberBoundaryAfter(query string, index int) bool {
|
|
if index >= len(query) {
|
|
return true
|
|
}
|
|
next := query[index]
|
|
return !isIdentifierChar(next) && next != '.'
|
|
}
|
|
|
|
func scanNumber(query string, start int) int {
|
|
i := start
|
|
for i < len(query) && isDigit(query[i]) {
|
|
i++
|
|
}
|
|
if i < len(query) && query[i] == '.' {
|
|
i++
|
|
for i < len(query) && isDigit(query[i]) {
|
|
i++
|
|
}
|
|
}
|
|
if i < len(query) && (query[i] == 'e' || query[i] == 'E') {
|
|
i++
|
|
if i < len(query) && (query[i] == '+' || query[i] == '-') {
|
|
i++
|
|
}
|
|
for i < len(query) && isDigit(query[i]) {
|
|
i++
|
|
}
|
|
}
|
|
return i
|
|
}
|