Files
amcs/internal/metadata/normalize.go

190 lines
4.7 KiB
Go

package metadata
import (
"sort"
"strings"
"git.warky.dev/wdevs/amcs/internal/config"
thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
)
const (
DefaultType = "observation"
DefaultTopicFallback = "uncategorized"
maxTopics = 10
)
var allowedTypes = map[string]struct{}{
"observation": {},
"task": {},
"idea": {},
"reference": {},
"person_note": {},
}
func Fallback(capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
topicFallback := strings.TrimSpace(capture.MetadataDefaults.TopicFallback)
if topicFallback == "" {
topicFallback = DefaultTopicFallback
}
return thoughttypes.ThoughtMetadata{
People: []string{},
ActionItems: []string{},
DatesMentioned: []string{},
Topics: []string{topicFallback},
Type: normalizeType(capture.MetadataDefaults.Type),
Source: normalizeSource(capture.Source),
Attachments: []thoughttypes.ThoughtAttachment{},
}
}
func Normalize(in thoughttypes.ThoughtMetadata, capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
out := thoughttypes.ThoughtMetadata{
People: normalizeList(in.People, 0),
ActionItems: normalizeList(in.ActionItems, 0),
DatesMentioned: normalizeList(in.DatesMentioned, 0),
Topics: normalizeList(in.Topics, maxTopics),
Type: normalizeType(in.Type),
Source: normalizeSource(in.Source),
Attachments: normalizeAttachments(in.Attachments),
}
if len(out.Topics) == 0 {
out.Topics = Fallback(capture).Topics
}
if out.Type == "" {
out.Type = Fallback(capture).Type
}
if out.Source == "" {
out.Source = Fallback(capture).Source
}
return out
}
func normalizeList(values []string, limit int) []string {
seen := make(map[string]struct{}, len(values))
result := make([]string, 0, len(values))
for _, value := range values {
trimmed := strings.Join(strings.Fields(strings.TrimSpace(value)), " ")
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
result = append(result, trimmed)
if limit > 0 && len(result) >= limit {
break
}
}
return result
}
func normalizeType(value string) string {
normalized := strings.ToLower(strings.TrimSpace(value))
if normalized == "" {
return DefaultType
}
if _, ok := allowedTypes[normalized]; ok {
return normalized
}
return DefaultType
}
func normalizeSource(value string) string {
normalized := strings.TrimSpace(value)
if normalized == "" {
return config.DefaultSource
}
return normalized
}
func Merge(base, patch thoughttypes.ThoughtMetadata, capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
merged := base
if len(patch.People) > 0 {
merged.People = append(append([]string{}, merged.People...), patch.People...)
}
if len(patch.ActionItems) > 0 {
merged.ActionItems = append(append([]string{}, merged.ActionItems...), patch.ActionItems...)
}
if len(patch.DatesMentioned) > 0 {
merged.DatesMentioned = append(append([]string{}, merged.DatesMentioned...), patch.DatesMentioned...)
}
if len(patch.Topics) > 0 {
merged.Topics = append(append([]string{}, merged.Topics...), patch.Topics...)
}
if strings.TrimSpace(patch.Type) != "" {
merged.Type = patch.Type
}
if strings.TrimSpace(patch.Source) != "" {
merged.Source = patch.Source
}
if len(patch.Attachments) > 0 {
merged.Attachments = append(append([]thoughttypes.ThoughtAttachment{}, merged.Attachments...), patch.Attachments...)
}
return Normalize(merged, capture)
}
func normalizeAttachments(values []thoughttypes.ThoughtAttachment) []thoughttypes.ThoughtAttachment {
seen := make(map[string]struct{}, len(values))
result := make([]thoughttypes.ThoughtAttachment, 0, len(values))
for _, value := range values {
if value.FileID.String() == "" || value.FileID.String() == "00000000-0000-0000-0000-000000000000" {
continue
}
key := value.FileID.String()
if _, ok := seen[key]; ok {
continue
}
value.Name = strings.TrimSpace(value.Name)
value.MediaType = strings.TrimSpace(value.MediaType)
value.Kind = strings.TrimSpace(value.Kind)
if value.SizeBytes < 0 {
value.SizeBytes = 0
}
value.SHA256 = strings.TrimSpace(value.SHA256)
seen[key] = struct{}{}
result = append(result, value)
}
return result
}
func SortedTopCounts(in map[string]int, limit int) []thoughttypes.KeyCount {
out := make([]thoughttypes.KeyCount, 0, len(in))
for key, count := range in {
if strings.TrimSpace(key) == "" {
continue
}
out = append(out, thoughttypes.KeyCount{Key: key, Count: count})
}
sort.Slice(out, func(i, j int) bool {
if out[i].Count == out[j].Count {
return out[i].Key < out[j].Key
}
return out[i].Count > out[j].Count
})
if limit > 0 && len(out) > limit {
return out[:limit]
}
return out
}