feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings. * Updated columnTypeConflict to utilize new type extraction logic. * Improved PostgreSQL type normalization and handling in various components.
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
@@ -5,9 +5,11 @@ package merge
 import (
 	"fmt"
 	"strconv"
 	"strings"
 	"git.warky.dev/wdevs/relspecgo/pkg/models"
 	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )
 // MergeResult represents the result of a merge operation
@@ -449,10 +451,40 @@ func columnTypeConflict(target, source *models.Column) bool {
 		return false
 	}
-	return normalizeType(target.Type) != normalizeType(source.Type) ||
+	tType, tLen, tPrec, tScale := extractTypeParts(target)
-		target.Length != source.Length ||
+	sType, sLen, sPrec, sScale := extractTypeParts(source)
-		target.Precision != source.Precision ||
+
-		target.Scale != source.Scale
+	return tType != sType || tLen != sLen || tPrec != sPrec || tScale != sScale
 }
 // extractTypeParts returns the canonical base type and dimensions for a column,
 // handling the case where dimensions are embedded in the type string (e.g. "char(2)")
 // rather than stored in the separate Length/Precision/Scale fields.
 func extractTypeParts(col *models.Column) (baseType string, length, precision, scale int) {
 	typeName := strings.ToLower(strings.TrimSpace(col.Type))
 	length, precision, scale = col.Length, col.Precision, col.Scale
 	if idx := strings.Index(typeName, "("); idx >= 0 {
 		inner := strings.TrimRight(strings.TrimSpace(typeName[idx+1:]), ")")
 		typeName = strings.TrimSpace(typeName[:idx])
 		parts := strings.Split(inner, ",")
 		if len(parts) == 2 {
 			if p, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && p > 0 && precision == 0 {
 				precision = p
 			}
 			if s, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && s > 0 && scale == 0 {
 				scale = s
 			}
 		} else if len(parts) == 1 {
 			if l, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && l > 0 && length == 0 && precision == 0 {
 				length = l
 			}
 		}
 	}
 	typeName = pgsql.NormalizePGType(typeName)
 	return typeName, length, precision, scale
 }
 func normalizeType(value string) string {
@@ -45,6 +45,7 @@ var GoToStdTypes = map[string]string{
 	"sqldate":         "date",
 	"sqltime":         "time",
 	"sqltimestamp":    "timestamp",
 	"time.Time":       "timestamp",
 }
 var GoToPGSQLTypes = map[string]string{
@@ -90,6 +91,7 @@ var GoToPGSQLTypes = map[string]string{
 	"sqldate":         "date",
 	"sqltime":         "time",
 	"sqltimestamp":    "timestamp",
 	"time.Time":       "timestamp",
 	"citext":          "citext",
 }
@@ -135,6 +137,62 @@ func ConvertSQLType(anytype string) string {
 	return anytype
 }
 // PGTypeCanonical maps PostgreSQL type aliases and synonyms to their canonical base name.
 // Input should be a base type (no dimension parameters, lowercase).
 var PGTypeCanonical = map[string]string{
 	// integer aliases
 	"int":  "integer",
 	"int4": "integer",
 	"int2": "smallint",
 	"int8": "bigint",
 	// float aliases
 	"float4": "real",
 	"float8": "double precision",
 	// bool alias
 	"bool": "boolean",
 	// char aliases
 	"character":         "char",
 	"character varying": "varchar",
 	"bpchar":            "char",
 	// timestamp aliases
 	"timestamp without time zone": "timestamp",
 	"timestamp with time zone":    "timestamptz",
 	// time aliases
 	"time without time zone": "time",
 	"time with time zone":    "timetz",
 	// decimal alias
 	"decimal": "numeric",
 }
 // knownPGBaseTypes is the set of canonical PostgreSQL base types (no aliases).
 var knownPGBaseTypes = map[string]struct{}{
 	"integer": {}, "bigint": {}, "smallint": {},
 	"serial": {}, "bigserial": {}, "smallserial": {},
 	"numeric": {}, "real": {}, "double precision": {}, "money": {},
 	"varchar": {}, "char": {}, "text": {}, "citext": {},
 	"boolean": {},
 	"date": {}, "time": {}, "timetz": {}, "timestamp": {}, "timestamptz": {}, "interval": {},
 	"uuid": {}, "json": {}, "jsonb": {}, "bytea": {},
 	"inet": {}, "cidr": {}, "macaddr": {}, "xml": {},
 }
 // NormalizePGType maps a PostgreSQL base type (no dimension parameters) to its
 // canonical form. Unknown types are returned as-is (lowercased).
 func NormalizePGType(baseType string) string {
 	lower := strings.ToLower(strings.TrimSpace(baseType))
 	if canonical, ok := PGTypeCanonical[lower]; ok {
 		return canonical
 	}
 	return lower
 }
 // IsKnownPGBaseType reports whether the given name (after NormalizePGType) is a
 // recognized built-in PostgreSQL type. Custom types (e.g. vector, postgis) return false.
 func IsKnownPGBaseType(baseType string) bool {
 	_, ok := knownPGBaseTypes[strings.ToLower(strings.TrimSpace(baseType))]
 	return ok
 }
 func IsGoType(pTypeName string) bool {
 	for k := range GoToStdTypes {
 		if strings.EqualFold(pTypeName, k) {
@@ -259,12 +259,14 @@ func (r *Reader) close() {
 	}
 }
-// mapDataType maps PostgreSQL data types while preserving exact type text when available.
+// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
 // For known built-in types, dimensions are stripped from the type string (they are
 // stored separately in column.Length/Precision/Scale). For custom types (e.g.
 // vector(1536), postgis geometries), the full formatted type is preserved.
 func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
 	normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))
-	// If the column has a nextval default, it's likely a serial type
+	// Detect serial types from nextval defaults before anything else.
 	// Map to the appropriate serial type instead of the base integer type
 	if hasNextval {
 		switch normalizedPGType {
 		case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		}
 	}
 	// Prefer the database-provided formatted type; this preserves arrays/custom
 	// types/modifiers like text[], vector(1536), numeric(10,2), etc.
 	if strings.TrimSpace(formattedType) != "" {
 		return formattedType
 	}
 	// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
 	if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 		return udtName[1:] + "[]"
 	}
-	// Map common PostgreSQL types
+	// Use the database-formatted type when available. For known built-in types, strip
-	typeMap := map[string]string{
+	// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
-		"integer":                     "integer",
+	// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
-		"bigint":                      "bigint",
+	if strings.TrimSpace(formattedType) != "" {
-		"smallint":                    "smallint",
+		lower := strings.ToLower(strings.TrimSpace(formattedType))
-		"int":                         "integer",
+		isArray := strings.HasSuffix(lower, "[]")
-		"int2":                        "smallint",
+		base := strings.TrimSuffix(lower, "[]")
-		"int4":                        "integer",
+		if idx := strings.Index(base, "("); idx >= 0 {
-		"int8":                        "bigint",
+			base = strings.TrimSpace(base[:idx])
-		"serial":                      "serial",
+		}
-		"bigserial":                   "bigserial",
+		canonical := pgsql.NormalizePGType(base)
-		"smallserial":                 "smallserial",
+		if pgsql.IsKnownPGBaseType(canonical) {
-		"numeric":                     "numeric",
+			if isArray {
-		"decimal":                     "decimal",
+				return canonical + "[]"
-		"real":                        "real",
+			}
-		"double precision":            "double precision",
+			return canonical
-		"float4":                      "real",
+		}
-		"float8":                      "double precision",
+		return formattedType
 		"money":                       "money",
 		"character varying":           "varchar",
 		"varchar":                     "varchar",
 		"character":                   "char",
 		"char":                        "char",
 		"text":                        "text",
 		"boolean":                     "boolean",
 		"bool":                        "boolean",
 		"date":                        "date",
 		"time":                        "time",
 		"time without time zone":      "time",
 		"time with time zone":         "timetz",
 		"timestamp":                   "timestamp",
 		"timestamp without time zone": "timestamp",
 		"timestamp with time zone":    "timestamptz",
 		"timestamptz":                 "timestamptz",
 		"interval":                    "interval",
 		"uuid":                        "uuid",
 		"json":                        "json",
 		"jsonb":                       "jsonb",
 		"bytea":                       "bytea",
 		"inet":                        "inet",
 		"cidr":                        "cidr",
 		"macaddr":                     "macaddr",
 		"xml":                         "xml",
 	}
-	// Try mapped type first
+	// Fall back to normalizing the information_schema type name directly.
-	if mapped, exists := typeMap[normalizedPGType]; exists {
+	canonical := pgsql.NormalizePGType(normalizedPGType)
-		return mapped
+	if pgsql.IsKnownPGBaseType(canonical) {
 		return canonical
 	}
-	// Use pgsql utilities if available
+	// Return UDT name for custom types.
 	if pgsql.ValidSQLType(pgType) {
 		return pgsql.GetSQLType(pgType)
 	}
 	// Return UDT name for custom types (including array fallback when needed)
 	if udtName != "" {
 		if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 			return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		return udtName
 	}
 	// Default to the original type
 	return pgType
 }
@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
 		{"unknown_type", "custom", "", "custom"}, // Should return UDT name
 		{"ARRAY", "_text", "", "text[]"},
 		{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
-		{"character varying", "varchar", "character varying(255)", "character varying(255)"},
+		{"character varying", "varchar", "character varying(255)", "varchar"},
 	}
 	for _, tt := range tests {
@@ -5,6 +5,8 @@ import (
 	"regexp"
 	"strings"
 	"unicode"
 	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )
 // TemplateFunctions returns a map of custom template functions
@@ -162,24 +164,12 @@ func quoteIdent(s string) string {
 // Type conversion functions
-// goTypeToSQL converts Go type to PostgreSQL type
+// goTypeToSQL converts Go type to PostgreSQL type using the shared pgsql type map.
 func goTypeToSQL(goType string) string {
-	typeMap := map[string]string{
+	if sqlType, ok := pgsql.GoToPGSQLTypes[goType]; ok {
 		"string":    "text",
 		"int":       "integer",
 		"int32":     "integer",
 		"int64":     "bigint",
 		"float32":   "real",
 		"float64":   "double precision",
 		"bool":      "boolean",
 		"time.Time": "timestamp",
 		"[]byte":    "bytea",
 	}
 	if sqlType, ok := typeMap[goType]; ok {
 		return sqlType
 	}
-	return "text" // Default
+	return "text"
 }
 // sqlTypeToGo converts PostgreSQL type to Go type
@@ -2,6 +2,8 @@ package sqlite
 import (
 	"strings"
 	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )
 // SQLite type affinities
@@ -15,45 +17,29 @@ const (
 // MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities
 func MapPostgreSQLType(pgType string) string {
 	// Normalize the type
 	normalized := strings.ToLower(strings.TrimSpace(pgType))
 	// Remove array notation if present
 	normalized = strings.TrimSuffix(normalized, "[]")
 	// Remove precision/scale if present
 	if idx := strings.Index(normalized, "("); idx != -1 {
 		normalized = normalized[:idx]
 	}
 	// Resolve synonyms to canonical form before mapping
 	normalized = pgsql.NormalizePGType(normalized)
 	// Map to SQLite type affinity
 	switch normalized {
-	// TEXT affinity
+	case "varchar", "char", "text", "citext", "uuid",
-	case "varchar", "character varying", "text", "char", "character",
+		"timestamp", "timestamptz", "date", "time", "timetz",
-		"citext", "uuid", "timestamp", "timestamptz", "timestamp with time zone",
+		"json", "jsonb", "xml", "inet", "cidr", "macaddr":
 		"timestamp without time zone", "date", "time", "timetz", "time with time zone",
 		"time without time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr":
 		return TypeText
-
+	case "integer", "smallint", "bigint",
-	// INTEGER affinity
+		"serial", "smallserial", "bigserial", "boolean":
 	case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
 		"serial", "smallserial", "bigserial", "boolean", "bool":
 		return TypeInteger
-
+	case "real", "float", "double precision":
 	// REAL affinity
 	case "real", "float", "float4", "float8", "double precision":
 		return TypeReal
-
+	case "numeric", "money":
 	// NUMERIC affinity
 	case "numeric", "decimal", "money":
 		return TypeNumeric
 	// BLOB affinity
 	case "bytea", "blob":
 		return TypeBlob
 	default:
 		// Default to TEXT for unknown types
 		return TypeText
 	}
 }
@@ -65,10 +51,9 @@ func IsIntegerType(colType string) bool {
 	if idx := strings.Index(normalized, "("); idx != -1 {
 		normalized = normalized[:idx]
 	}
-
+	normalized = pgsql.NormalizePGType(normalized)
 	switch normalized {
-	case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
+	case "integer", "smallint", "bigint", "serial", "smallserial", "bigserial":
 		"serial", "smallserial", "bigserial":
 		return true
 	default:
 		return false