feat(backfill): implement backfill tool for generating missing embeddings

This commit is contained in:
2026-03-26 22:45:28 +02:00
parent 1dde7f233d
commit f4ef0e9163
19 changed files with 575 additions and 37 deletions

View File

@@ -17,6 +17,7 @@ type Config struct {
Search SearchConfig `yaml:"search"`
Logging LoggingConfig `yaml:"logging"`
Observability ObservabilityConfig `yaml:"observability"`
Backfill BackfillConfig `yaml:"backfill"`
}
type ServerConfig struct {
@@ -135,3 +136,12 @@ type ObservabilityConfig struct {
MetricsEnabled bool `yaml:"metrics_enabled"`
PprofEnabled bool `yaml:"pprof_enabled"`
}
type BackfillConfig struct {
Enabled bool `yaml:"enabled"`
RunOnStartup bool `yaml:"run_on_startup"`
Interval time.Duration `yaml:"interval"`
BatchSize int `yaml:"batch_size"`
MaxPerRun int `yaml:"max_per_run"`
IncludeArchived bool `yaml:"include_archived"`
}

View File

@@ -95,6 +95,13 @@ func defaultConfig() Config {
Level: "info",
Format: "json",
},
Backfill: BackfillConfig{
Enabled: false,
RunOnStartup: false,
Interval: 15 * time.Minute,
BatchSize: 20,
MaxPerRun: 100,
},
}
}

View File

@@ -81,5 +81,14 @@ func (c Config) Validate() error {
return fmt.Errorf("invalid config: logging.level is required")
}
if c.Backfill.Enabled {
if c.Backfill.BatchSize <= 0 {
return fmt.Errorf("invalid config: backfill.batch_size must be greater than zero when backfill is enabled")
}
if c.Backfill.MaxPerRun < c.Backfill.BatchSize {
return fmt.Errorf("invalid config: backfill.max_per_run must be >= backfill.batch_size")
}
}
return nil
}