- Added file upload handler to process both multipart and raw file uploads. - Implemented parsing logic for upload requests, including handling file metadata. - Introduced SaveFileDecodedInput structure for handling decoded file uploads. - Created unit tests for file upload parsing and validation. feat: add metadata retry configuration and functionality - Introduced MetadataRetryConfig to the application configuration. - Implemented MetadataRetryer to handle retrying metadata extraction for thoughts. - Added new tool for retrying failed metadata extractions. - Updated thought metadata structure to include status and timestamps for metadata processing. fix: enhance metadata normalization and error handling - Updated metadata normalization functions to track status and errors. - Improved handling of metadata extraction failures during thought updates and captures. - Ensured that metadata status is correctly set during various operations. refactor: streamline file saving logic in FilesTool - Refactored Save method in FilesTool to utilize new SaveDecoded method. - Simplified project and thought ID resolution logic during file saving.
250 lines
7.0 KiB
Go
250 lines
7.0 KiB
Go
package metadata
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.warky.dev/wdevs/amcs/internal/config"
|
|
thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
|
|
)
|
|
|
|
const (
|
|
DefaultType = "observation"
|
|
DefaultTopicFallback = "uncategorized"
|
|
MetadataStatusComplete = "complete"
|
|
MetadataStatusPending = "pending"
|
|
MetadataStatusFailed = "failed"
|
|
maxTopics = 10
|
|
)
|
|
|
|
var allowedTypes = map[string]struct{}{
|
|
"observation": {},
|
|
"task": {},
|
|
"idea": {},
|
|
"reference": {},
|
|
"person_note": {},
|
|
}
|
|
|
|
func Fallback(capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
|
|
topicFallback := strings.TrimSpace(capture.MetadataDefaults.TopicFallback)
|
|
if topicFallback == "" {
|
|
topicFallback = DefaultTopicFallback
|
|
}
|
|
|
|
return thoughttypes.ThoughtMetadata{
|
|
People: []string{},
|
|
ActionItems: []string{},
|
|
DatesMentioned: []string{},
|
|
Topics: []string{topicFallback},
|
|
Type: normalizeType(capture.MetadataDefaults.Type),
|
|
Source: normalizeSource(capture.Source),
|
|
Attachments: []thoughttypes.ThoughtAttachment{},
|
|
MetadataStatus: MetadataStatusComplete,
|
|
}
|
|
}
|
|
|
|
func Normalize(in thoughttypes.ThoughtMetadata, capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
|
|
out := thoughttypes.ThoughtMetadata{
|
|
People: normalizeList(in.People, 0),
|
|
ActionItems: normalizeList(in.ActionItems, 0),
|
|
DatesMentioned: normalizeList(in.DatesMentioned, 0),
|
|
Topics: normalizeList(in.Topics, maxTopics),
|
|
Type: normalizeType(in.Type),
|
|
Source: normalizeSource(in.Source),
|
|
Attachments: normalizeAttachments(in.Attachments),
|
|
MetadataStatus: normalizeMetadataStatus(in.MetadataStatus),
|
|
MetadataUpdatedAt: strings.TrimSpace(in.MetadataUpdatedAt),
|
|
MetadataLastAttemptedAt: strings.TrimSpace(in.MetadataLastAttemptedAt),
|
|
MetadataError: strings.TrimSpace(in.MetadataError),
|
|
}
|
|
|
|
if len(out.Topics) == 0 {
|
|
out.Topics = Fallback(capture).Topics
|
|
}
|
|
if out.Type == "" {
|
|
out.Type = Fallback(capture).Type
|
|
}
|
|
if out.Source == "" {
|
|
out.Source = Fallback(capture).Source
|
|
}
|
|
if out.MetadataStatus == "" {
|
|
out.MetadataStatus = MetadataStatusComplete
|
|
}
|
|
if out.MetadataStatus == MetadataStatusComplete {
|
|
out.MetadataError = ""
|
|
}
|
|
|
|
return out
|
|
}
|
|
|
|
func MarkMetadataPending(base thoughttypes.ThoughtMetadata, capture config.CaptureConfig, attempt time.Time, err error) thoughttypes.ThoughtMetadata {
|
|
out := Normalize(base, capture)
|
|
out.MetadataStatus = MetadataStatusPending
|
|
out.MetadataLastAttemptedAt = attempt.UTC().Format(time.RFC3339)
|
|
if err != nil {
|
|
out.MetadataError = strings.TrimSpace(err.Error())
|
|
}
|
|
out.MetadataUpdatedAt = strings.TrimSpace(base.MetadataUpdatedAt)
|
|
return out
|
|
}
|
|
|
|
func MarkMetadataFailed(base thoughttypes.ThoughtMetadata, capture config.CaptureConfig, attempt time.Time, err error) thoughttypes.ThoughtMetadata {
|
|
out := Normalize(base, capture)
|
|
out.MetadataStatus = MetadataStatusFailed
|
|
out.MetadataLastAttemptedAt = attempt.UTC().Format(time.RFC3339)
|
|
if err != nil {
|
|
out.MetadataError = strings.TrimSpace(err.Error())
|
|
}
|
|
out.MetadataUpdatedAt = strings.TrimSpace(base.MetadataUpdatedAt)
|
|
return out
|
|
}
|
|
|
|
func MarkMetadataComplete(base thoughttypes.ThoughtMetadata, capture config.CaptureConfig, updatedAt time.Time) thoughttypes.ThoughtMetadata {
|
|
out := Normalize(base, capture)
|
|
out.MetadataStatus = MetadataStatusComplete
|
|
timestamp := updatedAt.UTC().Format(time.RFC3339)
|
|
out.MetadataUpdatedAt = timestamp
|
|
out.MetadataLastAttemptedAt = timestamp
|
|
out.MetadataError = ""
|
|
return out
|
|
}
|
|
|
|
func normalizeList(values []string, limit int) []string {
|
|
seen := make(map[string]struct{}, len(values))
|
|
result := make([]string, 0, len(values))
|
|
|
|
for _, value := range values {
|
|
trimmed := strings.Join(strings.Fields(strings.TrimSpace(value)), " ")
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
|
|
key := strings.ToLower(trimmed)
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
|
|
seen[key] = struct{}{}
|
|
result = append(result, trimmed)
|
|
|
|
if limit > 0 && len(result) >= limit {
|
|
break
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func normalizeType(value string) string {
|
|
normalized := strings.ToLower(strings.TrimSpace(value))
|
|
if normalized == "" {
|
|
return DefaultType
|
|
}
|
|
if _, ok := allowedTypes[normalized]; ok {
|
|
return normalized
|
|
}
|
|
return DefaultType
|
|
}
|
|
|
|
func normalizeMetadataStatus(value string) string {
|
|
switch strings.ToLower(strings.TrimSpace(value)) {
|
|
case "", MetadataStatusComplete:
|
|
return MetadataStatusComplete
|
|
case MetadataStatusPending:
|
|
return MetadataStatusPending
|
|
case MetadataStatusFailed:
|
|
return MetadataStatusFailed
|
|
default:
|
|
return MetadataStatusComplete
|
|
}
|
|
}
|
|
|
|
func normalizeSource(value string) string {
|
|
normalized := strings.TrimSpace(value)
|
|
if normalized == "" {
|
|
return config.DefaultSource
|
|
}
|
|
return normalized
|
|
}
|
|
|
|
func Merge(base, patch thoughttypes.ThoughtMetadata, capture config.CaptureConfig) thoughttypes.ThoughtMetadata {
|
|
merged := base
|
|
|
|
if len(patch.People) > 0 {
|
|
merged.People = append(append([]string{}, merged.People...), patch.People...)
|
|
}
|
|
if len(patch.ActionItems) > 0 {
|
|
merged.ActionItems = append(append([]string{}, merged.ActionItems...), patch.ActionItems...)
|
|
}
|
|
if len(patch.DatesMentioned) > 0 {
|
|
merged.DatesMentioned = append(append([]string{}, merged.DatesMentioned...), patch.DatesMentioned...)
|
|
}
|
|
if len(patch.Topics) > 0 {
|
|
merged.Topics = append(append([]string{}, merged.Topics...), patch.Topics...)
|
|
}
|
|
if strings.TrimSpace(patch.Type) != "" {
|
|
merged.Type = patch.Type
|
|
}
|
|
if strings.TrimSpace(patch.Source) != "" {
|
|
merged.Source = patch.Source
|
|
}
|
|
if len(patch.Attachments) > 0 {
|
|
merged.Attachments = append(append([]thoughttypes.ThoughtAttachment{}, merged.Attachments...), patch.Attachments...)
|
|
}
|
|
|
|
return Normalize(merged, capture)
|
|
}
|
|
|
|
func normalizeAttachments(values []thoughttypes.ThoughtAttachment) []thoughttypes.ThoughtAttachment {
|
|
seen := make(map[string]struct{}, len(values))
|
|
result := make([]thoughttypes.ThoughtAttachment, 0, len(values))
|
|
|
|
for _, value := range values {
|
|
if value.FileID.String() == "" || value.FileID.String() == "00000000-0000-0000-0000-000000000000" {
|
|
continue
|
|
}
|
|
|
|
key := value.FileID.String()
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
|
|
value.Name = strings.TrimSpace(value.Name)
|
|
value.MediaType = strings.TrimSpace(value.MediaType)
|
|
value.Kind = strings.TrimSpace(value.Kind)
|
|
if value.SizeBytes < 0 {
|
|
value.SizeBytes = 0
|
|
}
|
|
value.SHA256 = strings.TrimSpace(value.SHA256)
|
|
|
|
seen[key] = struct{}{}
|
|
result = append(result, value)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func SortedTopCounts(in map[string]int, limit int) []thoughttypes.KeyCount {
|
|
out := make([]thoughttypes.KeyCount, 0, len(in))
|
|
for key, count := range in {
|
|
if strings.TrimSpace(key) == "" {
|
|
continue
|
|
}
|
|
out = append(out, thoughttypes.KeyCount{Key: key, Count: count})
|
|
}
|
|
|
|
sort.Slice(out, func(i, j int) bool {
|
|
if out[i].Count == out[j].Count {
|
|
return out[i].Key < out[j].Key
|
|
}
|
|
return out[i].Count > out[j].Count
|
|
})
|
|
|
|
if limit > 0 && len(out) > limit {
|
|
return out[:limit]
|
|
}
|
|
|
|
return out
|
|
}
|