402 lines
10 KiB
Markdown
402 lines
10 KiB
Markdown
# WriteBufferHandler 接口实现设计方案
|
||
|
||
本文档描述了 Bolt 和 Memory 引擎如何实现 `WriteBufferHandler` 接口的设计方案,以提高时序数据库的写入性能。
|
||
|
||
## 背景
|
||
|
||
当前的时序数据库实现中,`WriteBufferHandler` 接口和 `WriteBuffer` 结构体已经定义,但 Bolt 和 Memory 引擎尚未实现该接口。通过实现这个接口,我们可以利用缓冲机制来提高写入性能,特别是在高并发场景下。
|
||
|
||
## 设计目标
|
||
|
||
1. 实现 `WriteBufferHandler` 接口,包括 `WriteToBuffer`、`FlushBuffer` 和 `ValidatePoint` 方法
|
||
2. 优化批量写入性能,减少 I/O 操作
|
||
3. 确保数据一致性和错误处理
|
||
4. 支持高并发写入(>500K ops/sec)
|
||
|
||
## 1. Bolt 引擎实现
|
||
|
||
Bolt 是一个基于文件的键值存储,通常使用事务来管理写入操作。以下是 Bolt 引擎实现 `WriteBufferHandler` 的方案:
|
||
|
||
```go
|
||
// pkg/engine/bolt/bolt_write.go
|
||
|
||
package bolt
|
||
|
||
import (
|
||
"encoding/binary"
|
||
"encoding/json"
|
||
"fmt"
|
||
"sync"
|
||
"time"
|
||
|
||
"git.pyer.club/kingecg/gotidb/pkg/engine"
|
||
bolt "go.etcd.io/bbolt"
|
||
)
|
||
|
||
// 确保 BoltEngine 实现了 WriteBufferHandler 接口
|
||
var _ engine.WriteBufferHandler = (*BoltEngine)(nil)
|
||
|
||
// 临时存储结构,用于在事务提交前缓存数据点
|
||
type writeCache struct {
|
||
points map[string][]engine.DataPoint // 按序列ID分组的数据点
|
||
mu sync.Mutex
|
||
}
|
||
|
||
// 初始化写缓存
|
||
func (b *BoltEngine) initWriteCache() {
|
||
b.writeCache = &writeCache{
|
||
points: make(map[string][]engine.DataPoint),
|
||
}
|
||
}
|
||
|
||
// WriteToBuffer 实现 WriteBufferHandler 接口
|
||
// 将数据点添加到临时缓存,不立即写入数据库
|
||
func (b *BoltEngine) WriteToBuffer(point engine.DataPoint) error {
|
||
// 验证引擎状态
|
||
if !b.opened || b.closed {
|
||
return fmt.Errorf("bolt engine not open")
|
||
}
|
||
|
||
// 验证数据点
|
||
if err := b.validateDataPoint(point); err != nil {
|
||
return err
|
||
}
|
||
|
||
// 获取序列ID
|
||
seriesID := point.GetSeriesID()
|
||
|
||
// 添加到临时缓存
|
||
b.writeCache.mu.Lock()
|
||
defer b.writeCache.mu.Unlock()
|
||
|
||
if b.writeCache.points == nil {
|
||
b.writeCache.points = make(map[string][]engine.DataPoint)
|
||
}
|
||
|
||
b.writeCache.points[seriesID] = append(b.writeCache.points[seriesID], point)
|
||
|
||
return nil
|
||
}
|
||
|
||
// FlushBuffer 实现 WriteBufferHandler 接口
|
||
// 将临时缓存中的数据点写入数据库
|
||
func (b *BoltEngine) FlushBuffer() error {
|
||
// 验证引擎状态
|
||
if !b.opened || b.closed {
|
||
return fmt.Errorf("bolt engine not open")
|
||
}
|
||
|
||
// 获取并清空临时缓存
|
||
b.writeCache.mu.Lock()
|
||
points := b.writeCache.points
|
||
b.writeCache.points = make(map[string][]engine.DataPoint)
|
||
b.writeCache.mu.Unlock()
|
||
|
||
// 如果没有数据点,直接返回
|
||
if len(points) == 0 {
|
||
return nil
|
||
}
|
||
|
||
// 开始写入事务
|
||
return b.db.Update(func(tx *bolt.Tx) error {
|
||
// 获取或创建索引桶
|
||
indexBucket, err := tx.CreateBucketIfNotExists([]byte(indexBucketName))
|
||
if err != nil {
|
||
return fmt.Errorf("failed to create index bucket: %v", err)
|
||
}
|
||
|
||
// 按序列处理数据点
|
||
for seriesID, seriesPoints := range points {
|
||
// 获取或创建序列桶
|
||
bucketName := seriesBucketPrefix + seriesID
|
||
bucket, err := tx.CreateBucketIfNotExists([]byte(bucketName))
|
||
if err != nil {
|
||
return fmt.Errorf("failed to create series bucket: %v", err)
|
||
}
|
||
|
||
// 更新索引
|
||
if err := indexBucket.Put([]byte(seriesID), []byte{1}); err != nil {
|
||
return fmt.Errorf("failed to update index: %v", err)
|
||
}
|
||
|
||
// 写入数据点
|
||
for _, point := range seriesPoints {
|
||
// 序列化数据点
|
||
data, err := json.Marshal(point)
|
||
if err != nil {
|
||
return fmt.Errorf("failed to marshal data point: %v", err)
|
||
}
|
||
|
||
// 使用时间戳作为键
|
||
key := make([]byte, 8)
|
||
binary.BigEndian.PutUint64(key, uint64(point.Timestamp))
|
||
|
||
// 写入数据
|
||
if err := bucket.Put(key, data); err != nil {
|
||
return fmt.Errorf("failed to write data point: %v", err)
|
||
}
|
||
|
||
// 更新统计信息
|
||
b.stats.PointsCount++
|
||
}
|
||
}
|
||
|
||
// 更新统计信息
|
||
b.stats.LastWriteTime = time.Now()
|
||
|
||
return nil
|
||
})
|
||
}
|
||
|
||
// ValidatePoint 实现 WriteBufferHandler 接口
|
||
// 验证数据点是否可以写入,但不实际写入
|
||
func (b *BoltEngine) ValidatePoint(point engine.DataPoint) error {
|
||
// 验证引擎状态
|
||
if !b.opened || b.closed {
|
||
return fmt.Errorf("bolt engine not open")
|
||
}
|
||
|
||
// 验证数据点
|
||
return b.validateDataPoint(point)
|
||
}
|
||
|
||
// validateDataPoint 验证数据点
|
||
func (b *BoltEngine) validateDataPoint(point engine.DataPoint) error {
|
||
// 验证时间戳
|
||
if point.Timestamp <= 0 {
|
||
return fmt.Errorf("invalid timestamp: %d", point.Timestamp)
|
||
}
|
||
|
||
// 验证标签
|
||
if len(point.Labels) == 0 {
|
||
return fmt.Errorf("data point must have at least one label")
|
||
}
|
||
|
||
return nil
|
||
}
|
||
```
|
||
|
||
## 2. Memory 引擎实现
|
||
|
||
Memory 引擎将数据存储在内存中,通常使用 map 和锁来管理数据。以下是 Memory 引擎实现 `WriteBufferHandler` 的方案:
|
||
|
||
```go
|
||
// pkg/engine/memory/memory_write.go
|
||
|
||
package memory
|
||
|
||
import (
|
||
"fmt"
|
||
"math"
|
||
"sync"
|
||
"time"
|
||
|
||
"git.pyer.club/kingecg/gotidb/pkg/engine"
|
||
)
|
||
|
||
// 确保 MemoryEngine 实现了 WriteBufferHandler 接口
|
||
var _ engine.WriteBufferHandler = (*MemoryEngine)(nil)
|
||
|
||
// 临时写缓存
|
||
type memWriteCache struct {
|
||
points map[string][]engine.DataPoint
|
||
mu sync.Mutex
|
||
}
|
||
|
||
// 初始化写缓存
|
||
func (m *MemoryEngine) initWriteCache() {
|
||
m.writeCache = &memWriteCache{
|
||
points: make(map[string][]engine.DataPoint),
|
||
}
|
||
}
|
||
|
||
// WriteToBuffer 实现 WriteBufferHandler 接口
|
||
// 将数据点添加到临时缓存
|
||
func (m *MemoryEngine) WriteToBuffer(point engine.DataPoint) error {
|
||
// 验证引擎状态
|
||
if !m.opened || m.closed {
|
||
return fmt.Errorf("memory engine not open")
|
||
}
|
||
|
||
// 验证数据点
|
||
if err := m.validateDataPoint(point); err != nil {
|
||
return err
|
||
}
|
||
|
||
// 获取序列ID
|
||
seriesID := point.GetSeriesID()
|
||
|
||
// 添加到临时缓存
|
||
m.writeCache.mu.Lock()
|
||
defer m.writeCache.mu.Unlock()
|
||
|
||
if m.writeCache.points == nil {
|
||
m.writeCache.points = make(map[string][]engine.DataPoint)
|
||
}
|
||
|
||
m.writeCache.points[seriesID] = append(m.writeCache.points[seriesID], point)
|
||
|
||
return nil
|
||
}
|
||
|
||
// FlushBuffer 实现 WriteBufferHandler 接口
|
||
// 将临时缓存中的数据点写入内存存储
|
||
func (m *MemoryEngine) FlushBuffer() error {
|
||
// 验证引擎状态
|
||
if !m.opened || m.closed {
|
||
return fmt.Errorf("memory engine not open")
|
||
}
|
||
|
||
// 获取并清空临时缓存
|
||
m.writeCache.mu.Lock()
|
||
points := m.writeCache.points
|
||
m.writeCache.points = make(map[string][]engine.DataPoint)
|
||
m.writeCache.mu.Unlock()
|
||
|
||
// 如果没有数据点,直接返回
|
||
if len(points) == 0 {
|
||
return nil
|
||
}
|
||
|
||
// 写入数据
|
||
m.mu.Lock()
|
||
defer m.mu.Unlock()
|
||
|
||
for seriesID, seriesPoints := range points {
|
||
// 获取或创建序列
|
||
series, exists := m.series[seriesID]
|
||
if !exists {
|
||
series = &memorySeries{
|
||
id: seriesID,
|
||
points: make(map[int64]engine.DataPoint),
|
||
}
|
||
m.series[seriesID] = series
|
||
m.stats.SeriesCount++
|
||
}
|
||
|
||
// 写入数据点
|
||
for _, point := range seriesPoints {
|
||
// 在内存引擎中实现环形队列覆盖机制
|
||
// 如果序列中的数据点数量达到限制,删除最旧的数据点
|
||
if m.maxPointsPerSeries > 0 && len(series.points) >= m.maxPointsPerSeries {
|
||
// 找到最旧的时间戳
|
||
var oldestTimestamp int64 = math.MaxInt64
|
||
for ts := range series.points {
|
||
if ts < oldestTimestamp {
|
||
oldestTimestamp = ts
|
||
}
|
||
}
|
||
// 删除最旧的数据点
|
||
delete(series.points, oldestTimestamp)
|
||
}
|
||
|
||
// 添加新数据点
|
||
series.points[point.Timestamp] = point
|
||
m.stats.PointsCount++
|
||
}
|
||
}
|
||
|
||
// 更新统计信息
|
||
m.stats.LastWriteTime = time.Now()
|
||
|
||
return nil
|
||
}
|
||
|
||
// ValidatePoint 实现 WriteBufferHandler 接口
|
||
// 验证数据点是否可以写入,但不实际写入
|
||
func (m *MemoryEngine) ValidatePoint(point engine.DataPoint) error {
|
||
// 验证引擎状态
|
||
if !m.opened || m.closed {
|
||
return fmt.Errorf("memory engine not open")
|
||
}
|
||
|
||
// 验证数据点
|
||
return m.validateDataPoint(point)
|
||
}
|
||
|
||
// validateDataPoint 验证数据点
|
||
func (m *MemoryEngine) validateDataPoint(point engine.DataPoint) error {
|
||
// 验证时间戳
|
||
if point.Timestamp <= 0 {
|
||
return fmt.Errorf("invalid timestamp: %d", point.Timestamp)
|
||
}
|
||
|
||
// 验证标签
|
||
if len(point.Labels) == 0 {
|
||
return fmt.Errorf("data point must have at least one label")
|
||
}
|
||
|
||
return nil
|
||
}
|
||
```
|
||
|
||
## 3. 集成到引擎的 Write 方法中
|
||
|
||
最后,我们需要修改引擎的 `Write` 方法,使用 `WriteBuffer` 来处理批量写入:
|
||
|
||
```go
|
||
// 以 Bolt 引擎为例
|
||
func (b *BoltEngine) Write(ctx context.Context, points []DataPoint) error {
|
||
// 验证引擎状态
|
||
if !b.opened || b.closed {
|
||
return fmt.Errorf("bolt engine not open")
|
||
}
|
||
|
||
// 创建写缓冲区
|
||
// 缓冲区大小可以根据性能测试调整
|
||
buffer := engine.NewWriteBuffer(b, 1000)
|
||
|
||
// 写入数据点
|
||
for _, point := range points {
|
||
if err := buffer.Write(point); err != nil {
|
||
return fmt.Errorf("failed to write data point: %v", err)
|
||
}
|
||
}
|
||
|
||
// 刷新缓冲区,确保所有数据都被写入
|
||
if err := buffer.Flush(); err != nil {
|
||
return fmt.Errorf("failed to flush buffer: %v", err)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
```
|
||
|
||
## 4. 初始化和清理
|
||
|
||
在引擎的 `Open` 和 `Close` 方法中,我们需要初始化和清理写缓存:
|
||
|
||
```go
|
||
// 以 Bolt 引擎为例
|
||
func (b *BoltEngine) Open() error {
|
||
// 现有的打开逻辑...
|
||
|
||
// 初始化写缓存
|
||
b.initWriteCache()
|
||
|
||
return nil
|
||
}
|
||
|
||
func (b *BoltEngine) Close() error {
|
||
// 现有的关闭逻辑...
|
||
|
||
// 清理写缓存
|
||
b.writeCache = nil
|
||
|
||
return nil
|
||
}
|
||
```
|
||
|
||
## 5. 性能优化建议
|
||
|
||
1. **批量大小调优**:根据实际工作负载调整 `WriteBuffer` 的大小
|
||
2. **并发控制**:使用细粒度锁减少锁竞争
|
||
3. **内存管理**:对于 Memory 引擎,实现数据点过期和清理策略
|
||
4. **监控指标**:添加缓冲区性能指标(如缓冲区命中率、平均批量大小等)
|
||
|
||
## 6. 实现注意事项
|
||
|
||
1. **错误处理**:确保在出错时正确清理资源
|
||
2. **事务管理**:对于 Bolt 引擎,确保事务正确提交或回滚
|
||
3. **并发安全**:确保所有操作都是线程安全的
|
||
4. **内存泄 |