support binlog compression

master
兔子 1 year ago
parent 39ca67fcfe
commit 8b0423eb94

@ -21,6 +21,7 @@ type TxDetail struct {
Db string Db string
Table string Table string
SqlType string SqlType string
CompressionType string
Rows [][]interface{} Rows [][]interface{}
} }
@ -80,13 +81,7 @@ func parseBinlogDetail(r io.Reader, f func(Transaction)) error {
var ( var (
err error err error
n int64 n int64
db string = ""
tb string = ""
sql string = ""
sqlType string = ""
rowCnt uint32 = 0
tbMapPos uint32 = 0 tbMapPos uint32 = 0
rows [][]interface{}
) )
var tx Transaction var tx Transaction
currentGtid := "" currentGtid := ""
@ -157,9 +152,10 @@ func parseBinlogDetail(r io.Reader, f func(Transaction)) error {
//binEvent := &replication.BinlogEvent{RawData: rawData, Header: h, Event: e} //binEvent := &replication.BinlogEvent{RawData: rawData, Header: h, Event: e}
binEvent := &replication.BinlogEvent{Header: h, Event: e} // we donnot need raw data binEvent := &replication.BinlogEvent{Header: h, Event: e} // we donnot need raw data
db, tb, sqlType, sql, rowCnt, rows = GetDbTbAndQueryAndRowCntFromBinevent(binEvent) evs := ParseBinlogEvent(binEvent)
for _, ev := range evs {
startPos := 0 startPos := 0
if sqlType == "query" || sqlType == "gtid" { if ev.Type == "query" || ev.Type == "gtid" {
startPos = int(h.LogPos - h.EventSize) startPos = int(h.LogPos - h.EventSize)
//fmt.Println(h.Timestamp, h.LogPos-h.EventSize, h.LogPos, db, tb, "sql="+sql, rowCnt, sqlType) //fmt.Println(h.Timestamp, h.LogPos-h.EventSize, h.LogPos, db, tb, "sql="+sql, rowCnt, sqlType)
// cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: h.LogPos - h.EventSize, StopPos: h.LogPos, // cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: h.LogPos - h.EventSize, StopPos: h.LogPos,
@ -170,7 +166,7 @@ func parseBinlogDetail(r io.Reader, f func(Transaction)) error {
// cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: tbMapPos, StopPos: h.LogPos, // cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: tbMapPos, StopPos: h.LogPos,
// Database: db, Table: tb, QuerySql: sql, RowCnt: rowCnt, QueryType: sqlType} // Database: db, Table: tb, QuerySql: sql, RowCnt: rowCnt, QueryType: sqlType}
} }
switch sqlType { switch ev.Type {
case "gtid": case "gtid":
if currentGtid != "" { if currentGtid != "" {
idx := 0 idx := 0
@ -187,9 +183,9 @@ func parseBinlogDetail(r io.Reader, f func(Transaction)) error {
f(tx) f(tx)
} }
} }
currentGtid = sql currentGtid = ev.Data
tx = Transaction{ tx = Transaction{
GTID: sql, GTID: ev.Data,
StartPos: startPos, StartPos: startPos,
Timestamp: int64(h.Timestamp), Timestamp: int64(h.Timestamp),
Time: time.Unix(int64(h.Timestamp), 0), Time: time.Unix(int64(h.Timestamp), 0),
@ -199,58 +195,62 @@ func parseBinlogDetail(r io.Reader, f func(Transaction)) error {
continue continue
case "rowsquery": case "rowsquery":
tx.EndPos = int(h.LogPos) tx.EndPos = int(h.LogPos)
tx.sqlOrigin = append(tx.sqlOrigin, sql) tx.sqlOrigin = append(tx.sqlOrigin, ev.Data)
default: default:
tx.EndPos = int(h.LogPos) tx.EndPos = int(h.LogPos)
tx.Txs = append(tx.Txs, TxDetail{ tx.Txs = append(tx.Txs, TxDetail{
StartPos: startPos, StartPos: startPos,
EndPos: int(h.LogPos), EndPos: int(h.LogPos),
Db: db, Db: ev.DB,
Table: tb, Table: ev.TB,
Sql: sql, Sql: ev.Data,
SqlType: sqlType, SqlType: ev.Type,
Rows: rows, Rows: ev.Rows,
RowCount: int(rowCnt), RowCount: int(ev.RowCnt),
Timestamp: int64(h.Timestamp), Timestamp: int64(h.Timestamp),
Time: time.Unix(int64(h.Timestamp), 0), Time: time.Unix(int64(h.Timestamp), 0),
CompressionType: ev.CompressionType,
}) })
} }
} }
}
} }
func GetDbTbAndQueryAndRowCntFromBinevent(ev *replication.BinlogEvent) (string, string, string, string, uint32, [][]interface{}) { type BinlogEvent struct {
var ( Type string
db string = "" DB string
tb string = "" TB string
sql string = "" Data string
sqlType string = "" RowCnt uint32
rowCnt uint32 = 0 Rows [][]interface{}
rows [][]interface{} CompressionType string
) }
func ParseBinlogEvent(ev *replication.BinlogEvent) []BinlogEvent {
var res []BinlogEvent
var sig BinlogEvent
switch ev.Header.EventType { switch ev.Header.EventType {
case replication.ANONYMOUS_GTID_EVENT: case replication.ANONYMOUS_GTID_EVENT:
//ge := ev.Event.(*replication.GTIDEvent) //ge := ev.Event.(*replication.GTIDEvent)
sql = "anonymous-gtid-event:1" sig.Data = "anonymous-gtid-event:1"
sqlType = "gtid" sig.Type = "gtid"
case replication.WRITE_ROWS_EVENTv1, case replication.WRITE_ROWS_EVENTv1,
replication.WRITE_ROWS_EVENTv2: replication.WRITE_ROWS_EVENTv2:
wrEvent := ev.Event.(*replication.RowsEvent) wrEvent := ev.Event.(*replication.RowsEvent)
db = string(wrEvent.Table.Schema) sig.TB = string(wrEvent.Table.Schema)
tb = string(wrEvent.Table.Table) sig.TB = string(wrEvent.Table.Table)
sqlType = "insert" sig.Type = "insert"
rowCnt = uint32(len(wrEvent.Rows)) sig.RowCnt = uint32(len(wrEvent.Rows))
rows = wrEvent.Rows sig.Rows = wrEvent.Rows
case replication.UPDATE_ROWS_EVENTv1, case replication.UPDATE_ROWS_EVENTv1,
replication.UPDATE_ROWS_EVENTv2: replication.UPDATE_ROWS_EVENTv2:
wrEvent := ev.Event.(*replication.RowsEvent) wrEvent := ev.Event.(*replication.RowsEvent)
db = string(wrEvent.Table.Schema) sig.DB = string(wrEvent.Table.Schema)
tb = string(wrEvent.Table.Table) sig.DB = string(wrEvent.Table.Table)
sqlType = "update" sig.Type = "update"
rowCnt = uint32(len(wrEvent.Rows)) / 2 sig.RowCnt = uint32(len(wrEvent.Rows)) / 2
rows = wrEvent.Rows sig.Rows = wrEvent.Rows
case replication.DELETE_ROWS_EVENTv1, case replication.DELETE_ROWS_EVENTv1,
replication.DELETE_ROWS_EVENTv2: replication.DELETE_ROWS_EVENTv2:
@ -258,41 +258,54 @@ func GetDbTbAndQueryAndRowCntFromBinevent(ev *replication.BinlogEvent) (string,
//replication.TABLE_MAP_EVENT: //replication.TABLE_MAP_EVENT:
wrEvent := ev.Event.(*replication.RowsEvent) wrEvent := ev.Event.(*replication.RowsEvent)
db = string(wrEvent.Table.Schema) sig.DB = string(wrEvent.Table.Schema)
tb = string(wrEvent.Table.Table) sig.TB = string(wrEvent.Table.Table)
sqlType = "delete" sig.Type = "delete"
rowCnt = uint32(len(wrEvent.Rows)) sig.RowCnt = uint32(len(wrEvent.Rows))
rows = wrEvent.Rows sig.Rows = wrEvent.Rows
case replication.ROWS_QUERY_EVENT: case replication.ROWS_QUERY_EVENT:
queryEvent := ev.Event.(*replication.RowsQueryEvent) queryEvent := ev.Event.(*replication.RowsQueryEvent)
sql = string(queryEvent.Query) sig.Data = string(queryEvent.Query)
sqlType = "rowsquery" sig.Type = "rowsquery"
case replication.QUERY_EVENT: case replication.QUERY_EVENT:
queryEvent := ev.Event.(*replication.QueryEvent) queryEvent := ev.Event.(*replication.QueryEvent)
db = string(queryEvent.Schema) sig.DB = string(queryEvent.Schema)
sql = string(queryEvent.Query) sig.Data = string(queryEvent.Query)
sqlType = "query" sig.Type = "query"
case replication.MARIADB_GTID_EVENT: case replication.MARIADB_GTID_EVENT:
// For global transaction ID, used to start a new transaction event group, instead of the old BEGIN query event, and also to mark stand-alone (ddl). // For global transaction ID, used to start a new transaction event group, instead of the old BEGIN query event, and also to mark stand-alone (ddl).
//https://mariadb.com/kb/en/library/gtid_event/ //https://mariadb.com/kb/en/library/gtid_event/
sql = "begin" sig.Data = "begin"
sqlType = "query" sig.Type = "query"
case replication.XID_EVENT: case replication.XID_EVENT:
// XID_EVENT represents commit。rollback transaction not in binlog // XID_EVENT represents commit。rollback transaction not in binlog
sql = "commit" sig.Data = "commit"
sqlType = "query" sig.Type = "query"
case replication.GTID_EVENT: case replication.GTID_EVENT:
ge := ev.Event.(*replication.GTIDEvent) ge := ev.Event.(*replication.GTIDEvent)
gid, err := gtid.Parse(fmt.Sprintf("%s:%d", bytesToUuid(ge.SID), ge.GNO)) gid, err := gtid.Parse(fmt.Sprintf("%s:%d", bytesToUuid(ge.SID), ge.GNO))
if err == nil { if err == nil {
sql = gid.String() sig.Data = gid.String()
} }
sqlType = "gtid" sig.Type = "gtid"
case replication.TRANSACTION_PAYLOAD_EVENT:
ge := ev.Event.(*replication.TransactionPayloadEvent)
for _, val := range ge.Events {
res = append(res, ParseBinlogEvent(val)...)
} }
return db, tb, sqlType, sql, rowCnt, rows for idx := range res {
if ge.CompressionType == 0 {
res[idx].CompressionType = "ZSTD"
} else if ge.CompressionType != 255 {
res[idx].CompressionType = "UNKNOWN"
}
}
return res
}
res = append(res, sig)
return res
} }
type BinlogFilter struct { type BinlogFilter struct {
@ -304,6 +317,7 @@ type BinlogFilter struct {
EndDate time.Time EndDate time.Time
BigThan int BigThan int
SmallThan int SmallThan int
OnlyShowGtid bool
} }
func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter BinlogFilter, fn func(Transaction)) error { func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter BinlogFilter, fn func(Transaction)) error {
@ -325,14 +339,8 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
// process: 0, continue: 1, break: 2, EOF: 3 // process: 0, continue: 1, break: 2, EOF: 3
var ( var (
n int64 n int64
db string = ""
tb string = ""
sql string = ""
sqlType string = ""
rowCnt uint32 = 0
tbMapPos uint32 = 0 tbMapPos uint32 = 0
skipTillNext bool = false skipTillNext bool = false
rows [][]interface{}
) )
var tx Transaction var tx Transaction
@ -398,6 +406,9 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
if skipTillNext && h.EventType != replication.GTID_EVENT { if skipTillNext && h.EventType != replication.GTID_EVENT {
continue continue
} }
if filter.OnlyShowGtid && h.EventType != replication.GTID_EVENT {
continue
}
//h.Dump(os.Stdout) //h.Dump(os.Stdout)
data := buf.Bytes() data := buf.Bytes()
@ -426,9 +437,10 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
//binEvent := &replication.BinlogEvent{RawData: rawData, Header: h, Event: e} //binEvent := &replication.BinlogEvent{RawData: rawData, Header: h, Event: e}
binEvent := &replication.BinlogEvent{Header: h, Event: e} // we donnot need raw data binEvent := &replication.BinlogEvent{Header: h, Event: e} // we donnot need raw data
db, tb, sqlType, sql, rowCnt, rows = GetDbTbAndQueryAndRowCntFromBinevent(binEvent) evs := ParseBinlogEvent(binEvent)
for _, ev := range evs {
startPos := 0 startPos := 0
if sqlType == "query" || sqlType == "gtid" { if ev.Type == "query" || ev.Type == "gtid" {
startPos = int(h.LogPos - h.EventSize) startPos = int(h.LogPos - h.EventSize)
//fmt.Println(h.Timestamp, h.LogPos-h.EventSize, h.LogPos, db, tb, "sql="+sql, rowCnt, sqlType) //fmt.Println(h.Timestamp, h.LogPos-h.EventSize, h.LogPos, db, tb, "sql="+sql, rowCnt, sqlType)
// cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: h.LogPos - h.EventSize, StopPos: h.LogPos, // cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: h.LogPos - h.EventSize, StopPos: h.LogPos,
@ -439,7 +451,7 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
// cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: tbMapPos, StopPos: h.LogPos, // cfg.StatChan <- BinEventStats{Timestamp: h.Timestamp, Binlog: *binlog, StartPos: tbMapPos, StopPos: h.LogPos,
// Database: db, Table: tb, QuerySql: sql, RowCnt: rowCnt, QueryType: sqlType} // Database: db, Table: tb, QuerySql: sql, RowCnt: rowCnt, QueryType: sqlType}
} }
switch sqlType { switch ev.Type {
case "gtid": case "gtid":
if skipTillNext { if skipTillNext {
skipTillNext = false skipTillNext = false
@ -457,23 +469,23 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
tx.Size = tx.EndPos - tx.StartPos tx.Size = tx.EndPos - tx.StartPos
callFn(tx) callFn(tx)
} }
currentGtid = sql currentGtid = ev.Data
if inGtid != nil { if inGtid != nil {
if c, _ := inGtid.Contain(sql); !c { if c, _ := inGtid.Contain(ev.Data); !c {
currentGtid = "" currentGtid = ""
skipTillNext = true skipTillNext = true
continue continue
} }
} }
if exGtid != nil { if exGtid != nil {
if c, _ := exGtid.Contain(sql); c { if c, _ := exGtid.Contain(ev.Data); c {
currentGtid = "" currentGtid = ""
skipTillNext = true skipTillNext = true
continue continue
} }
} }
tx = Transaction{ tx = Transaction{
GTID: sql, GTID: ev.Data,
StartPos: startPos, StartPos: startPos,
Timestamp: int64(h.Timestamp), Timestamp: int64(h.Timestamp),
Time: time.Unix(int64(h.Timestamp), 0), Time: time.Unix(int64(h.Timestamp), 0),
@ -483,24 +495,26 @@ func parseBinlogWithFilter(r io.Reader, parse *replication.BinlogParser, filter
continue continue
case "rowsquery": case "rowsquery":
tx.EndPos = int(h.LogPos) tx.EndPos = int(h.LogPos)
tx.sqlOrigin = append(tx.sqlOrigin, sql) tx.sqlOrigin = append(tx.sqlOrigin, ev.Data)
default: default:
tx.EndPos = int(h.LogPos) tx.EndPos = int(h.LogPos)
tx.Txs = append(tx.Txs, TxDetail{ tx.Txs = append(tx.Txs, TxDetail{
StartPos: startPos, StartPos: startPos,
EndPos: int(h.LogPos), EndPos: int(h.LogPos),
Db: db, Db: ev.DB,
Table: tb, Table: ev.TB,
Sql: sql, Sql: ev.Data,
SqlType: sqlType, SqlType: ev.Type,
Rows: rows, Rows: ev.Rows,
RowCount: int(rowCnt), RowCount: int(ev.RowCnt),
Timestamp: int64(h.Timestamp), Timestamp: int64(h.Timestamp),
Time: time.Unix(int64(h.Timestamp), 0), Time: time.Unix(int64(h.Timestamp), 0),
CompressionType: ev.CompressionType,
}) })
} }
} }
} }
}
func ParseBinlogWithFilter(path string, pos int64, filter BinlogFilter, fx func(Transaction)) error { func ParseBinlogWithFilter(path string, pos int64, filter BinlogFilter, fx func(Transaction)) error {
defer func() { defer func() {

@ -2,6 +2,7 @@ package binlog
import ( import (
"fmt" "fmt"
"os"
"testing" "testing"
"time" "time"
) )
@ -26,3 +27,15 @@ func TestParseFilter(t *testing.T) {
fmt.Println(transaction) fmt.Println(transaction)
}) })
} }
func TestParseExternal(t *testing.T) {
file := `C:\mysql-bin.000001`
if _, err := os.Stat(file); err != nil {
return
}
ParseBinlogWithFilter(file, 0, BinlogFilter{
OnlyShowGtid: true,
}, func(transaction Transaction) {
fmt.Println(transaction)
})
}

Loading…
Cancel
Save