feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings.
* Updated columnTypeConflict to utilize new type extraction logic.
* Improved PostgreSQL type normalization and handling in various components.
This commit is contained in:
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
+36 -4
View File
@@ -5,9 +5,11 @@ package merge
import ( import (
"fmt" "fmt"
"strconv"
"strings" "strings"
"git.warky.dev/wdevs/relspecgo/pkg/models" "git.warky.dev/wdevs/relspecgo/pkg/models"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
) )
// MergeResult represents the result of a merge operation // MergeResult represents the result of a merge operation
@@ -449,10 +451,40 @@ func columnTypeConflict(target, source *models.Column) bool {
return false return false
} }
return normalizeType(target.Type) != normalizeType(source.Type) || tType, tLen, tPrec, tScale := extractTypeParts(target)
target.Length != source.Length || sType, sLen, sPrec, sScale := extractTypeParts(source)
target.Precision != source.Precision ||
target.Scale != source.Scale return tType != sType || tLen != sLen || tPrec != sPrec || tScale != sScale
}
// extractTypeParts returns the canonical base type and dimensions for a column,
// handling the case where dimensions are embedded in the type string (e.g. "char(2)")
// rather than stored in the separate Length/Precision/Scale fields.
func extractTypeParts(col *models.Column) (baseType string, length, precision, scale int) {
typeName := strings.ToLower(strings.TrimSpace(col.Type))
length, precision, scale = col.Length, col.Precision, col.Scale
if idx := strings.Index(typeName, "("); idx >= 0 {
inner := strings.TrimRight(strings.TrimSpace(typeName[idx+1:]), ")")
typeName = strings.TrimSpace(typeName[:idx])
parts := strings.Split(inner, ",")
if len(parts) == 2 {
if p, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && p > 0 && precision == 0 {
precision = p
}
if s, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && s > 0 && scale == 0 {
scale = s
}
} else if len(parts) == 1 {
if l, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && l > 0 && length == 0 && precision == 0 {
length = l
}
}
}
typeName = pgsql.NormalizePGType(typeName)
return typeName, length, precision, scale
} }
func normalizeType(value string) string { func normalizeType(value string) string {
+58
View File
@@ -45,6 +45,7 @@ var GoToStdTypes = map[string]string{
"sqldate": "date", "sqldate": "date",
"sqltime": "time", "sqltime": "time",
"sqltimestamp": "timestamp", "sqltimestamp": "timestamp",
"time.Time": "timestamp",
} }
var GoToPGSQLTypes = map[string]string{ var GoToPGSQLTypes = map[string]string{
@@ -90,6 +91,7 @@ var GoToPGSQLTypes = map[string]string{
"sqldate": "date", "sqldate": "date",
"sqltime": "time", "sqltime": "time",
"sqltimestamp": "timestamp", "sqltimestamp": "timestamp",
"time.Time": "timestamp",
"citext": "citext", "citext": "citext",
} }
@@ -135,6 +137,62 @@ func ConvertSQLType(anytype string) string {
return anytype return anytype
} }
// PGTypeCanonical maps PostgreSQL type aliases and synonyms to their canonical base name.
// Input should be a base type (no dimension parameters, lowercase).
var PGTypeCanonical = map[string]string{
// integer aliases
"int": "integer",
"int4": "integer",
"int2": "smallint",
"int8": "bigint",
// float aliases
"float4": "real",
"float8": "double precision",
// bool alias
"bool": "boolean",
// char aliases
"character": "char",
"character varying": "varchar",
"bpchar": "char",
// timestamp aliases
"timestamp without time zone": "timestamp",
"timestamp with time zone": "timestamptz",
// time aliases
"time without time zone": "time",
"time with time zone": "timetz",
// decimal alias
"decimal": "numeric",
}
// knownPGBaseTypes is the set of canonical PostgreSQL base types (no aliases).
var knownPGBaseTypes = map[string]struct{}{
"integer": {}, "bigint": {}, "smallint": {},
"serial": {}, "bigserial": {}, "smallserial": {},
"numeric": {}, "real": {}, "double precision": {}, "money": {},
"varchar": {}, "char": {}, "text": {}, "citext": {},
"boolean": {},
"date": {}, "time": {}, "timetz": {}, "timestamp": {}, "timestamptz": {}, "interval": {},
"uuid": {}, "json": {}, "jsonb": {}, "bytea": {},
"inet": {}, "cidr": {}, "macaddr": {}, "xml": {},
}
// NormalizePGType maps a PostgreSQL base type (no dimension parameters) to its
// canonical form. Unknown types are returned as-is (lowercased).
func NormalizePGType(baseType string) string {
lower := strings.ToLower(strings.TrimSpace(baseType))
if canonical, ok := PGTypeCanonical[lower]; ok {
return canonical
}
return lower
}
// IsKnownPGBaseType reports whether the given name (after NormalizePGType) is a
// recognized built-in PostgreSQL type. Custom types (e.g. vector, postgis) return false.
func IsKnownPGBaseType(baseType string) bool {
_, ok := knownPGBaseTypes[strings.ToLower(strings.TrimSpace(baseType))]
return ok
}
func IsGoType(pTypeName string) bool { func IsGoType(pTypeName string) bool {
for k := range GoToStdTypes { for k := range GoToStdTypes {
if strings.EqualFold(pTypeName, k) { if strings.EqualFold(pTypeName, k) {
+28 -62
View File
@@ -259,12 +259,14 @@ func (r *Reader) close() {
} }
} }
// mapDataType maps PostgreSQL data types while preserving exact type text when available. // mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
// For known built-in types, dimensions are stripped from the type string (they are
// stored separately in column.Length/Precision/Scale). For custom types (e.g.
// vector(1536), postgis geometries), the full formatted type is preserved.
func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string { func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
normalizedPGType := strings.ToLower(strings.TrimSpace(pgType)) normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))
// If the column has a nextval default, it's likely a serial type // Detect serial types from nextval defaults before anything else.
// Map to the appropriate serial type instead of the base integer type
if hasNextval { if hasNextval {
switch normalizedPGType { switch normalizedPGType {
case "integer", "int", "int4": case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
} }
} }
// Prefer the database-provided formatted type; this preserves arrays/custom
// types/modifiers like text[], vector(1536), numeric(10,2), etc.
if strings.TrimSpace(formattedType) != "" {
return formattedType
}
// information_schema reports arrays generically as "ARRAY" with udt_name like "_text". // information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 { if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]" return udtName[1:] + "[]"
} }
// Map common PostgreSQL types // Use the database-formatted type when available. For known built-in types, strip
typeMap := map[string]string{ // embedded dimensions (they are stored in column.Length/Precision/Scale separately).
"integer": "integer", // For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
"bigint": "bigint", if strings.TrimSpace(formattedType) != "" {
"smallint": "smallint", lower := strings.ToLower(strings.TrimSpace(formattedType))
"int": "integer", isArray := strings.HasSuffix(lower, "[]")
"int2": "smallint", base := strings.TrimSuffix(lower, "[]")
"int4": "integer", if idx := strings.Index(base, "("); idx >= 0 {
"int8": "bigint", base = strings.TrimSpace(base[:idx])
"serial": "serial", }
"bigserial": "bigserial", canonical := pgsql.NormalizePGType(base)
"smallserial": "smallserial", if pgsql.IsKnownPGBaseType(canonical) {
"numeric": "numeric", if isArray {
"decimal": "decimal", return canonical + "[]"
"real": "real", }
"double precision": "double precision", return canonical
"float4": "real", }
"float8": "double precision", return formattedType
"money": "money",
"character varying": "varchar",
"varchar": "varchar",
"character": "char",
"char": "char",
"text": "text",
"boolean": "boolean",
"bool": "boolean",
"date": "date",
"time": "time",
"time without time zone": "time",
"time with time zone": "timetz",
"timestamp": "timestamp",
"timestamp without time zone": "timestamp",
"timestamp with time zone": "timestamptz",
"timestamptz": "timestamptz",
"interval": "interval",
"uuid": "uuid",
"json": "json",
"jsonb": "jsonb",
"bytea": "bytea",
"inet": "inet",
"cidr": "cidr",
"macaddr": "macaddr",
"xml": "xml",
} }
// Try mapped type first // Fall back to normalizing the information_schema type name directly.
if mapped, exists := typeMap[normalizedPGType]; exists { canonical := pgsql.NormalizePGType(normalizedPGType)
return mapped if pgsql.IsKnownPGBaseType(canonical) {
return canonical
} }
// Use pgsql utilities if available // Return UDT name for custom types.
if pgsql.ValidSQLType(pgType) {
return pgsql.GetSQLType(pgType)
}
// Return UDT name for custom types (including array fallback when needed)
if udtName != "" { if udtName != "" {
if strings.HasPrefix(udtName, "_") && len(udtName) > 1 { if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]" return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
return udtName return udtName
} }
// Default to the original type
return pgType return pgType
} }
+1 -1
View File
@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
{"unknown_type", "custom", "", "custom"}, // Should return UDT name {"unknown_type", "custom", "", "custom"}, // Should return UDT name
{"ARRAY", "_text", "", "text[]"}, {"ARRAY", "_text", "", "text[]"},
{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"}, {"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
{"character varying", "varchar", "character varying(255)", "character varying(255)"}, {"character varying", "varchar", "character varying(255)", "varchar"},
} }
for _, tt := range tests { for _, tt := range tests {
+5 -15
View File
@@ -5,6 +5,8 @@ import (
"regexp" "regexp"
"strings" "strings"
"unicode" "unicode"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
) )
// TemplateFunctions returns a map of custom template functions // TemplateFunctions returns a map of custom template functions
@@ -162,24 +164,12 @@ func quoteIdent(s string) string {
// Type conversion functions // Type conversion functions
// goTypeToSQL converts Go type to PostgreSQL type // goTypeToSQL converts Go type to PostgreSQL type using the shared pgsql type map.
func goTypeToSQL(goType string) string { func goTypeToSQL(goType string) string {
typeMap := map[string]string{ if sqlType, ok := pgsql.GoToPGSQLTypes[goType]; ok {
"string": "text",
"int": "integer",
"int32": "integer",
"int64": "bigint",
"float32": "real",
"float64": "double precision",
"bool": "boolean",
"time.Time": "timestamp",
"[]byte": "bytea",
}
if sqlType, ok := typeMap[goType]; ok {
return sqlType return sqlType
} }
return "text" // Default return "text"
} }
// sqlTypeToGo converts PostgreSQL type to Go type // sqlTypeToGo converts PostgreSQL type to Go type
+13 -28
View File
@@ -2,6 +2,8 @@ package sqlite
import ( import (
"strings" "strings"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
) )
// SQLite type affinities // SQLite type affinities
@@ -15,45 +17,29 @@ const (
// MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities // MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities
func MapPostgreSQLType(pgType string) string { func MapPostgreSQLType(pgType string) string {
// Normalize the type
normalized := strings.ToLower(strings.TrimSpace(pgType)) normalized := strings.ToLower(strings.TrimSpace(pgType))
// Remove array notation if present
normalized = strings.TrimSuffix(normalized, "[]") normalized = strings.TrimSuffix(normalized, "[]")
// Remove precision/scale if present
if idx := strings.Index(normalized, "("); idx != -1 { if idx := strings.Index(normalized, "("); idx != -1 {
normalized = normalized[:idx] normalized = normalized[:idx]
} }
// Resolve synonyms to canonical form before mapping
normalized = pgsql.NormalizePGType(normalized)
// Map to SQLite type affinity
switch normalized { switch normalized {
// TEXT affinity case "varchar", "char", "text", "citext", "uuid",
case "varchar", "character varying", "text", "char", "character", "timestamp", "timestamptz", "date", "time", "timetz",
"citext", "uuid", "timestamp", "timestamptz", "timestamp with time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr":
"timestamp without time zone", "date", "time", "timetz", "time with time zone",
"time without time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr":
return TypeText return TypeText
case "integer", "smallint", "bigint",
// INTEGER affinity "serial", "smallserial", "bigserial", "boolean":
case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
"serial", "smallserial", "bigserial", "boolean", "bool":
return TypeInteger return TypeInteger
case "real", "float", "double precision":
// REAL affinity
case "real", "float", "float4", "float8", "double precision":
return TypeReal return TypeReal
case "numeric", "money":
// NUMERIC affinity
case "numeric", "decimal", "money":
return TypeNumeric return TypeNumeric
// BLOB affinity
case "bytea", "blob": case "bytea", "blob":
return TypeBlob return TypeBlob
default: default:
// Default to TEXT for unknown types
return TypeText return TypeText
} }
} }
@@ -65,10 +51,9 @@ func IsIntegerType(colType string) bool {
if idx := strings.Index(normalized, "("); idx != -1 { if idx := strings.Index(normalized, "("); idx != -1 {
normalized = normalized[:idx] normalized = normalized[:idx]
} }
normalized = pgsql.NormalizePGType(normalized)
switch normalized { switch normalized {
case "int", "int2", "int4", "int8", "integer", "smallint", "bigint", case "integer", "smallint", "bigint", "serial", "smallserial", "bigserial":
"serial", "smallserial", "bigserial":
return true return true
default: default:
return false return false