diff --git a/pkg/merge/merge.go b/pkg/merge/merge.go index 0ed3186..376eb7f 100644 --- a/pkg/merge/merge.go +++ b/pkg/merge/merge.go @@ -5,9 +5,11 @@ package merge import ( "fmt" + "strconv" "strings" "git.warky.dev/wdevs/relspecgo/pkg/models" + "git.warky.dev/wdevs/relspecgo/pkg/pgsql" ) // MergeResult represents the result of a merge operation @@ -449,10 +451,40 @@ func columnTypeConflict(target, source *models.Column) bool { return false } - return normalizeType(target.Type) != normalizeType(source.Type) || - target.Length != source.Length || - target.Precision != source.Precision || - target.Scale != source.Scale + tType, tLen, tPrec, tScale := extractTypeParts(target) + sType, sLen, sPrec, sScale := extractTypeParts(source) + + return tType != sType || tLen != sLen || tPrec != sPrec || tScale != sScale +} + +// extractTypeParts returns the canonical base type and dimensions for a column, +// handling the case where dimensions are embedded in the type string (e.g. "char(2)") +// rather than stored in the separate Length/Precision/Scale fields. +func extractTypeParts(col *models.Column) (baseType string, length, precision, scale int) { + typeName := strings.ToLower(strings.TrimSpace(col.Type)) + length, precision, scale = col.Length, col.Precision, col.Scale + + if idx := strings.Index(typeName, "("); idx >= 0 { + inner := strings.TrimRight(strings.TrimSpace(typeName[idx+1:]), ")") + typeName = strings.TrimSpace(typeName[:idx]) + parts := strings.Split(inner, ",") + if len(parts) == 2 { + if p, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && p > 0 && precision == 0 { + precision = p + } + if s, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && s > 0 && scale == 0 { + scale = s + } + } else if len(parts) == 1 { + if l, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && l > 0 && length == 0 && precision == 0 { + length = l + } + } + } + + typeName = pgsql.NormalizePGType(typeName) + + return typeName, length, precision, scale } func normalizeType(value string) string { diff --git a/pkg/pgsql/datatypes.go b/pkg/pgsql/datatypes.go index ebe163f..02e857d 100644 --- a/pkg/pgsql/datatypes.go +++ b/pkg/pgsql/datatypes.go @@ -45,6 +45,7 @@ var GoToStdTypes = map[string]string{ "sqldate": "date", "sqltime": "time", "sqltimestamp": "timestamp", + "time.Time": "timestamp", } var GoToPGSQLTypes = map[string]string{ @@ -90,6 +91,7 @@ var GoToPGSQLTypes = map[string]string{ "sqldate": "date", "sqltime": "time", "sqltimestamp": "timestamp", + "time.Time": "timestamp", "citext": "citext", } @@ -135,6 +137,62 @@ func ConvertSQLType(anytype string) string { return anytype } +// PGTypeCanonical maps PostgreSQL type aliases and synonyms to their canonical base name. +// Input should be a base type (no dimension parameters, lowercase). +var PGTypeCanonical = map[string]string{ + // integer aliases + "int": "integer", + "int4": "integer", + "int2": "smallint", + "int8": "bigint", + // float aliases + "float4": "real", + "float8": "double precision", + // bool alias + "bool": "boolean", + // char aliases + "character": "char", + "character varying": "varchar", + "bpchar": "char", + // timestamp aliases + "timestamp without time zone": "timestamp", + "timestamp with time zone": "timestamptz", + // time aliases + "time without time zone": "time", + "time with time zone": "timetz", + // decimal alias + "decimal": "numeric", +} + +// knownPGBaseTypes is the set of canonical PostgreSQL base types (no aliases). +var knownPGBaseTypes = map[string]struct{}{ + "integer": {}, "bigint": {}, "smallint": {}, + "serial": {}, "bigserial": {}, "smallserial": {}, + "numeric": {}, "real": {}, "double precision": {}, "money": {}, + "varchar": {}, "char": {}, "text": {}, "citext": {}, + "boolean": {}, + "date": {}, "time": {}, "timetz": {}, "timestamp": {}, "timestamptz": {}, "interval": {}, + "uuid": {}, "json": {}, "jsonb": {}, "bytea": {}, + "inet": {}, "cidr": {}, "macaddr": {}, "xml": {}, +} + +// NormalizePGType maps a PostgreSQL base type (no dimension parameters) to its +// canonical form. Unknown types are returned as-is (lowercased). +func NormalizePGType(baseType string) string { + lower := strings.ToLower(strings.TrimSpace(baseType)) + if canonical, ok := PGTypeCanonical[lower]; ok { + return canonical + } + return lower +} + +// IsKnownPGBaseType reports whether the given name (after NormalizePGType) is a +// recognized built-in PostgreSQL type. Custom types (e.g. vector, postgis) return false. +func IsKnownPGBaseType(baseType string) bool { + _, ok := knownPGBaseTypes[strings.ToLower(strings.TrimSpace(baseType))] + return ok +} + func IsGoType(pTypeName string) bool { for k := range GoToStdTypes { if strings.EqualFold(pTypeName, k) { diff --git a/pkg/readers/pgsql/reader.go b/pkg/readers/pgsql/reader.go index 04a00ca..5c14966 100644 --- a/pkg/readers/pgsql/reader.go +++ b/pkg/readers/pgsql/reader.go @@ -259,12 +259,14 @@ func (r *Reader) close() { } } -// mapDataType maps PostgreSQL data types while preserving exact type text when available. +// mapDataType maps a PostgreSQL data type to its canonical RelSpec name. +// For known built-in types, dimensions are stripped from the type string (they are +// stored separately in column.Length/Precision/Scale). For custom types (e.g. +// vector(1536), postgis geometries), the full formatted type is preserved. func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string { normalizedPGType := strings.ToLower(strings.TrimSpace(pgType)) - // If the column has a nextval default, it's likely a serial type - // Map to the appropriate serial type instead of the base integer type + // Detect serial types from nextval defaults before anything else. if hasNextval { switch normalizedPGType { case "integer", "int", "int4": @@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b } } - // Prefer the database-provided formatted type; this preserves arrays/custom - // types/modifiers like text[], vector(1536), numeric(10,2), etc. - if strings.TrimSpace(formattedType) != "" { - return formattedType - } - // information_schema reports arrays generically as "ARRAY" with udt_name like "_text". if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 { return udtName[1:] + "[]" } - // Map common PostgreSQL types - typeMap := map[string]string{ - "integer": "integer", - "bigint": "bigint", - "smallint": "smallint", - "int": "integer", - "int2": "smallint", - "int4": "integer", - "int8": "bigint", - "serial": "serial", - "bigserial": "bigserial", - "smallserial": "smallserial", - "numeric": "numeric", - "decimal": "decimal", - "real": "real", - "double precision": "double precision", - "float4": "real", - "float8": "double precision", - "money": "money", - "character varying": "varchar", - "varchar": "varchar", - "character": "char", - "char": "char", - "text": "text", - "boolean": "boolean", - "bool": "boolean", - "date": "date", - "time": "time", - "time without time zone": "time", - "time with time zone": "timetz", - "timestamp": "timestamp", - "timestamp without time zone": "timestamp", - "timestamp with time zone": "timestamptz", - "timestamptz": "timestamptz", - "interval": "interval", - "uuid": "uuid", - "json": "json", - "jsonb": "jsonb", - "bytea": "bytea", - "inet": "inet", - "cidr": "cidr", - "macaddr": "macaddr", - "xml": "xml", + // Use the database-formatted type when available. For known built-in types, strip + // embedded dimensions (they are stored in column.Length/Precision/Scale separately). + // For unknown/custom types, keep the full formatted string (e.g. vector(1536)). + if strings.TrimSpace(formattedType) != "" { + lower := strings.ToLower(strings.TrimSpace(formattedType)) + isArray := strings.HasSuffix(lower, "[]") + base := strings.TrimSuffix(lower, "[]") + if idx := strings.Index(base, "("); idx >= 0 { + base = strings.TrimSpace(base[:idx]) + } + canonical := pgsql.NormalizePGType(base) + if pgsql.IsKnownPGBaseType(canonical) { + if isArray { + return canonical + "[]" + } + return canonical + } + return formattedType } - // Try mapped type first - if mapped, exists := typeMap[normalizedPGType]; exists { - return mapped + // Fall back to normalizing the information_schema type name directly. + canonical := pgsql.NormalizePGType(normalizedPGType) + if pgsql.IsKnownPGBaseType(canonical) { + return canonical } - // Use pgsql utilities if available - if pgsql.ValidSQLType(pgType) { - return pgsql.GetSQLType(pgType) - } - - // Return UDT name for custom types (including array fallback when needed) + // Return UDT name for custom types. if udtName != "" { if strings.HasPrefix(udtName, "_") && len(udtName) > 1 { return udtName[1:] + "[]" @@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b return udtName } - // Default to the original type return pgType } diff --git a/pkg/readers/pgsql/reader_test.go b/pkg/readers/pgsql/reader_test.go index e7eb09c..022a892 100644 --- a/pkg/readers/pgsql/reader_test.go +++ b/pkg/readers/pgsql/reader_test.go @@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) { {"unknown_type", "custom", "", "custom"}, // Should return UDT name {"ARRAY", "_text", "", "text[]"}, {"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"}, - {"character varying", "varchar", "character varying(255)", "character varying(255)"}, + {"character varying", "varchar", "character varying(255)", "varchar"}, } for _, tt := range tests { diff --git a/pkg/writers/pgsql/template_functions.go b/pkg/writers/pgsql/template_functions.go index a6b0638..ae54b0a 100644 --- a/pkg/writers/pgsql/template_functions.go +++ b/pkg/writers/pgsql/template_functions.go @@ -5,6 +5,8 @@ import ( "regexp" "strings" "unicode" + + "git.warky.dev/wdevs/relspecgo/pkg/pgsql" ) // TemplateFunctions returns a map of custom template functions @@ -162,24 +164,12 @@ func quoteIdent(s string) string { // Type conversion functions -// goTypeToSQL converts Go type to PostgreSQL type +// goTypeToSQL converts Go type to PostgreSQL type using the shared pgsql type map. func goTypeToSQL(goType string) string { - typeMap := map[string]string{ - "string": "text", - "int": "integer", - "int32": "integer", - "int64": "bigint", - "float32": "real", - "float64": "double precision", - "bool": "boolean", - "time.Time": "timestamp", - "[]byte": "bytea", - } - - if sqlType, ok := typeMap[goType]; ok { + if sqlType, ok := pgsql.GoToPGSQLTypes[goType]; ok { return sqlType } - return "text" // Default + return "text" } // sqlTypeToGo converts PostgreSQL type to Go type diff --git a/pkg/writers/sqlite/datatypes.go b/pkg/writers/sqlite/datatypes.go index b3dd60f..bed217d 100644 --- a/pkg/writers/sqlite/datatypes.go +++ b/pkg/writers/sqlite/datatypes.go @@ -2,6 +2,8 @@ package sqlite import ( "strings" + + "git.warky.dev/wdevs/relspecgo/pkg/pgsql" ) // SQLite type affinities @@ -15,45 +17,29 @@ const ( // MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities func MapPostgreSQLType(pgType string) string { - // Normalize the type normalized := strings.ToLower(strings.TrimSpace(pgType)) - - // Remove array notation if present normalized = strings.TrimSuffix(normalized, "[]") - - // Remove precision/scale if present if idx := strings.Index(normalized, "("); idx != -1 { normalized = normalized[:idx] } + // Resolve synonyms to canonical form before mapping + normalized = pgsql.NormalizePGType(normalized) - // Map to SQLite type affinity switch normalized { - // TEXT affinity - case "varchar", "character varying", "text", "char", "character", - "citext", "uuid", "timestamp", "timestamptz", "timestamp with time zone", - "timestamp without time zone", "date", "time", "timetz", "time with time zone", - "time without time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr": + case "varchar", "char", "text", "citext", "uuid", + "timestamp", "timestamptz", "date", "time", "timetz", + "json", "jsonb", "xml", "inet", "cidr", "macaddr": return TypeText - - // INTEGER affinity - case "int", "int2", "int4", "int8", "integer", "smallint", "bigint", - "serial", "smallserial", "bigserial", "boolean", "bool": + case "integer", "smallint", "bigint", + "serial", "smallserial", "bigserial", "boolean": return TypeInteger - - // REAL affinity - case "real", "float", "float4", "float8", "double precision": + case "real", "float", "double precision": return TypeReal - - // NUMERIC affinity - case "numeric", "decimal", "money": + case "numeric", "money": return TypeNumeric - - // BLOB affinity case "bytea", "blob": return TypeBlob - default: - // Default to TEXT for unknown types return TypeText } } @@ -65,10 +51,9 @@ func IsIntegerType(colType string) bool { if idx := strings.Index(normalized, "("); idx != -1 { normalized = normalized[:idx] } - + normalized = pgsql.NormalizePGType(normalized) switch normalized { - case "int", "int2", "int4", "int8", "integer", "smallint", "bigint", - "serial", "smallserial", "bigserial": + case "integer", "smallint", "bigint", "serial", "smallserial", "bigserial": return true default: return false