feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings. * Updated columnTypeConflict to utilize new type extraction logic. * Improved PostgreSQL type normalization and handling in various components.
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
@@ -5,9 +5,11 @@ package merge

 import (
 	"fmt"
+	"strconv"
 	"strings"

 	"git.warky.dev/wdevs/relspecgo/pkg/models"
+	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )

 // MergeResult represents the result of a merge operation
@@ -449,10 +451,40 @@ func columnTypeConflict(target, source *models.Column) bool {
 		return false
 	}

-	return normalizeType(target.Type) != normalizeType(source.Type) ||
-		target.Length != source.Length ||
-		target.Precision != source.Precision ||
-		target.Scale != source.Scale
+	tType, tLen, tPrec, tScale := extractTypeParts(target)
+	sType, sLen, sPrec, sScale := extractTypeParts(source)
+
+	return tType != sType || tLen != sLen || tPrec != sPrec || tScale != sScale
+}
+
+// extractTypeParts returns the canonical base type and dimensions for a column,
+// handling the case where dimensions are embedded in the type string (e.g. "char(2)")
+// rather than stored in the separate Length/Precision/Scale fields.
+func extractTypeParts(col *models.Column) (baseType string, length, precision, scale int) {
+	typeName := strings.ToLower(strings.TrimSpace(col.Type))
+	length, precision, scale = col.Length, col.Precision, col.Scale
+
+	if idx := strings.Index(typeName, "("); idx >= 0 {
+		inner := strings.TrimRight(strings.TrimSpace(typeName[idx+1:]), ")")
+		typeName = strings.TrimSpace(typeName[:idx])
+		parts := strings.Split(inner, ",")
+		if len(parts) == 2 {
+			if p, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && p > 0 && precision == 0 {
+				precision = p
+			}
+			if s, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && s > 0 && scale == 0 {
+				scale = s
+			}
+		} else if len(parts) == 1 {
+			if l, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && l > 0 && length == 0 && precision == 0 {
+				length = l
+			}
+		}
+	}
+
+	typeName = pgsql.NormalizePGType(typeName)
+
+	return typeName, length, precision, scale
 }

 func normalizeType(value string) string {
@@ -45,6 +45,7 @@ var GoToStdTypes = map[string]string{
 	"sqldate":         "date",
 	"sqltime":         "time",
 	"sqltimestamp":    "timestamp",
+	"time.Time":       "timestamp",
 }

 var GoToPGSQLTypes = map[string]string{
@@ -90,6 +91,7 @@ var GoToPGSQLTypes = map[string]string{
 	"sqldate":         "date",
 	"sqltime":         "time",
 	"sqltimestamp":    "timestamp",
+	"time.Time":       "timestamp",
 	"citext":          "citext",
 }

@@ -135,6 +137,62 @@ func ConvertSQLType(anytype string) string {
 	return anytype
 }

+// PGTypeCanonical maps PostgreSQL type aliases and synonyms to their canonical base name.
+// Input should be a base type (no dimension parameters, lowercase).
+var PGTypeCanonical = map[string]string{
+	// integer aliases
+	"int":  "integer",
+	"int4": "integer",
+	"int2": "smallint",
+	"int8": "bigint",
+	// float aliases
+	"float4": "real",
+	"float8": "double precision",
+	// bool alias
+	"bool": "boolean",
+	// char aliases
+	"character":         "char",
+	"character varying": "varchar",
+	"bpchar":            "char",
+	// timestamp aliases
+	"timestamp without time zone": "timestamp",
+	"timestamp with time zone":    "timestamptz",
+	// time aliases
+	"time without time zone": "time",
+	"time with time zone":    "timetz",
+	// decimal alias
+	"decimal": "numeric",
+}
+
+// knownPGBaseTypes is the set of canonical PostgreSQL base types (no aliases).
+var knownPGBaseTypes = map[string]struct{}{
+	"integer": {}, "bigint": {}, "smallint": {},
+	"serial": {}, "bigserial": {}, "smallserial": {},
+	"numeric": {}, "real": {}, "double precision": {}, "money": {},
+	"varchar": {}, "char": {}, "text": {}, "citext": {},
+	"boolean": {},
+	"date": {}, "time": {}, "timetz": {}, "timestamp": {}, "timestamptz": {}, "interval": {},
+	"uuid": {}, "json": {}, "jsonb": {}, "bytea": {},
+	"inet": {}, "cidr": {}, "macaddr": {}, "xml": {},
+}
+
+// NormalizePGType maps a PostgreSQL base type (no dimension parameters) to its
+// canonical form. Unknown types are returned as-is (lowercased).
+func NormalizePGType(baseType string) string {
+	lower := strings.ToLower(strings.TrimSpace(baseType))
+	if canonical, ok := PGTypeCanonical[lower]; ok {
+		return canonical
+	}
+	return lower
+}
+
+// IsKnownPGBaseType reports whether the given name (after NormalizePGType) is a
+// recognized built-in PostgreSQL type. Custom types (e.g. vector, postgis) return false.
+func IsKnownPGBaseType(baseType string) bool {
+	_, ok := knownPGBaseTypes[strings.ToLower(strings.TrimSpace(baseType))]
+	return ok
+}
+
 func IsGoType(pTypeName string) bool {
 	for k := range GoToStdTypes {
 		if strings.EqualFold(pTypeName, k) {
@@ -259,12 +259,14 @@ func (r *Reader) close() {
 	}
 }

-// mapDataType maps PostgreSQL data types while preserving exact type text when available.
+// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
+// For known built-in types, dimensions are stripped from the type string (they are
+// stored separately in column.Length/Precision/Scale). For custom types (e.g.
+// vector(1536), postgis geometries), the full formatted type is preserved.
 func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
 	normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))

-	// If the column has a nextval default, it's likely a serial type
-	// Map to the appropriate serial type instead of the base integer type
+	// Detect serial types from nextval defaults before anything else.
 	if hasNextval {
 		switch normalizedPGType {
 		case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		}
 	}

-	// Prefer the database-provided formatted type; this preserves arrays/custom
-	// types/modifiers like text[], vector(1536), numeric(10,2), etc.
-	if strings.TrimSpace(formattedType) != "" {
-		return formattedType
-	}
-
 	// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
 	if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 		return udtName[1:] + "[]"
 	}

-	// Map common PostgreSQL types
-	typeMap := map[string]string{
-		"integer":                     "integer",
-		"bigint":                      "bigint",
-		"smallint":                    "smallint",
-		"int":                         "integer",
-		"int2":                        "smallint",
-		"int4":                        "integer",
-		"int8":                        "bigint",
-		"serial":                      "serial",
-		"bigserial":                   "bigserial",
-		"smallserial":                 "smallserial",
-		"numeric":                     "numeric",
-		"decimal":                     "decimal",
-		"real":                        "real",
-		"double precision":            "double precision",
-		"float4":                      "real",
-		"float8":                      "double precision",
-		"money":                       "money",
-		"character varying":           "varchar",
-		"varchar":                     "varchar",
-		"character":                   "char",
-		"char":                        "char",
-		"text":                        "text",
-		"boolean":                     "boolean",
-		"bool":                        "boolean",
-		"date":                        "date",
-		"time":                        "time",
-		"time without time zone":      "time",
-		"time with time zone":         "timetz",
-		"timestamp":                   "timestamp",
-		"timestamp without time zone": "timestamp",
-		"timestamp with time zone":    "timestamptz",
-		"timestamptz":                 "timestamptz",
-		"interval":                    "interval",
-		"uuid":                        "uuid",
-		"json":                        "json",
-		"jsonb":                       "jsonb",
-		"bytea":                       "bytea",
-		"inet":                        "inet",
-		"cidr":                        "cidr",
-		"macaddr":                     "macaddr",
-		"xml":                         "xml",
+	// Use the database-formatted type when available. For known built-in types, strip
+	// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
+	// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
+	if strings.TrimSpace(formattedType) != "" {
+		lower := strings.ToLower(strings.TrimSpace(formattedType))
+		isArray := strings.HasSuffix(lower, "[]")
+		base := strings.TrimSuffix(lower, "[]")
+		if idx := strings.Index(base, "("); idx >= 0 {
+			base = strings.TrimSpace(base[:idx])
+		}
+		canonical := pgsql.NormalizePGType(base)
+		if pgsql.IsKnownPGBaseType(canonical) {
+			if isArray {
+				return canonical + "[]"
+			}
+			return canonical
+		}
+		return formattedType
 	}

-	// Try mapped type first
-	if mapped, exists := typeMap[normalizedPGType]; exists {
-		return mapped
+	// Fall back to normalizing the information_schema type name directly.
+	canonical := pgsql.NormalizePGType(normalizedPGType)
+	if pgsql.IsKnownPGBaseType(canonical) {
+		return canonical
 	}

-	// Use pgsql utilities if available
-	if pgsql.ValidSQLType(pgType) {
-		return pgsql.GetSQLType(pgType)
-	}
-
-	// Return UDT name for custom types (including array fallback when needed)
+	// Return UDT name for custom types.
 	if udtName != "" {
 		if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 			return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		return udtName
 	}

-	// Default to the original type
 	return pgType
 }

@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
 		{"unknown_type", "custom", "", "custom"}, // Should return UDT name
 		{"ARRAY", "_text", "", "text[]"},
 		{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
-		{"character varying", "varchar", "character varying(255)", "character varying(255)"},
+		{"character varying", "varchar", "character varying(255)", "varchar"},
 	}

 	for _, tt := range tests {
@@ -5,6 +5,8 @@ import (
 	"regexp"
 	"strings"
 	"unicode"
+
+	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )

 // TemplateFunctions returns a map of custom template functions
@@ -162,24 +164,12 @@ func quoteIdent(s string) string {

 // Type conversion functions

-// goTypeToSQL converts Go type to PostgreSQL type
+// goTypeToSQL converts Go type to PostgreSQL type using the shared pgsql type map.
 func goTypeToSQL(goType string) string {
-	typeMap := map[string]string{
-		"string":    "text",
-		"int":       "integer",
-		"int32":     "integer",
-		"int64":     "bigint",
-		"float32":   "real",
-		"float64":   "double precision",
-		"bool":      "boolean",
-		"time.Time": "timestamp",
-		"[]byte":    "bytea",
-	}
-
-	if sqlType, ok := typeMap[goType]; ok {
+	if sqlType, ok := pgsql.GoToPGSQLTypes[goType]; ok {
 		return sqlType
 	}
-	return "text" // Default
+	return "text"
 }

 // sqlTypeToGo converts PostgreSQL type to Go type
@@ -2,6 +2,8 @@ package sqlite

 import (
 	"strings"
+
+	"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
 )

 // SQLite type affinities
@@ -15,45 +17,29 @@ const (

 // MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities
 func MapPostgreSQLType(pgType string) string {
-	// Normalize the type
 	normalized := strings.ToLower(strings.TrimSpace(pgType))
-
-	// Remove array notation if present
 	normalized = strings.TrimSuffix(normalized, "[]")
-
-	// Remove precision/scale if present
 	if idx := strings.Index(normalized, "("); idx != -1 {
 		normalized = normalized[:idx]
 	}
+	// Resolve synonyms to canonical form before mapping
+	normalized = pgsql.NormalizePGType(normalized)

-	// Map to SQLite type affinity
 	switch normalized {
-	// TEXT affinity
-	case "varchar", "character varying", "text", "char", "character",
-		"citext", "uuid", "timestamp", "timestamptz", "timestamp with time zone",
-		"timestamp without time zone", "date", "time", "timetz", "time with time zone",
-		"time without time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr":
+	case "varchar", "char", "text", "citext", "uuid",
+		"timestamp", "timestamptz", "date", "time", "timetz",
+		"json", "jsonb", "xml", "inet", "cidr", "macaddr":
 		return TypeText
-
-	// INTEGER affinity
-	case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
-		"serial", "smallserial", "bigserial", "boolean", "bool":
+	case "integer", "smallint", "bigint",
+		"serial", "smallserial", "bigserial", "boolean":
 		return TypeInteger
-
-	// REAL affinity
-	case "real", "float", "float4", "float8", "double precision":
+	case "real", "float", "double precision":
 		return TypeReal
-
-	// NUMERIC affinity
-	case "numeric", "decimal", "money":
+	case "numeric", "money":
 		return TypeNumeric
-
-	// BLOB affinity
 	case "bytea", "blob":
 		return TypeBlob
-
 	default:
-		// Default to TEXT for unknown types
 		return TypeText
 	}
 }
@@ -65,10 +51,9 @@ func IsIntegerType(colType string) bool {
 	if idx := strings.Index(normalized, "("); idx != -1 {
 		normalized = normalized[:idx]
 	}
-
+	normalized = pgsql.NormalizePGType(normalized)
 	switch normalized {
-	case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
-		"serial", "smallserial", "bigserial":
+	case "integer", "smallint", "bigint", "serial", "smallserial", "bigserial":
 		return true
 	default:
 		return false