feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings. * Updated columnTypeConflict to utilize new type extraction logic. * Improved PostgreSQL type normalization and handling in various components.
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
@@ -259,12 +259,14 @@ func (r *Reader) close() {
 	}
 }

-// mapDataType maps PostgreSQL data types while preserving exact type text when available.
+// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
+// For known built-in types, dimensions are stripped from the type string (they are
+// stored separately in column.Length/Precision/Scale). For custom types (e.g.
+// vector(1536), postgis geometries), the full formatted type is preserved.
 func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
 	normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))

-	// If the column has a nextval default, it's likely a serial type
-	// Map to the appropriate serial type instead of the base integer type
+	// Detect serial types from nextval defaults before anything else.
 	if hasNextval {
 		switch normalizedPGType {
 		case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		}
 	}

-	// Prefer the database-provided formatted type; this preserves arrays/custom
-	// types/modifiers like text[], vector(1536), numeric(10,2), etc.
-	if strings.TrimSpace(formattedType) != "" {
-		return formattedType
-	}
-
 	// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
 	if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 		return udtName[1:] + "[]"
 	}

-	// Map common PostgreSQL types
-	typeMap := map[string]string{
-		"integer":                     "integer",
-		"bigint":                      "bigint",
-		"smallint":                    "smallint",
-		"int":                         "integer",
-		"int2":                        "smallint",
-		"int4":                        "integer",
-		"int8":                        "bigint",
-		"serial":                      "serial",
-		"bigserial":                   "bigserial",
-		"smallserial":                 "smallserial",
-		"numeric":                     "numeric",
-		"decimal":                     "decimal",
-		"real":                        "real",
-		"double precision":            "double precision",
-		"float4":                      "real",
-		"float8":                      "double precision",
-		"money":                       "money",
-		"character varying":           "varchar",
-		"varchar":                     "varchar",
-		"character":                   "char",
-		"char":                        "char",
-		"text":                        "text",
-		"boolean":                     "boolean",
-		"bool":                        "boolean",
-		"date":                        "date",
-		"time":                        "time",
-		"time without time zone":      "time",
-		"time with time zone":         "timetz",
-		"timestamp":                   "timestamp",
-		"timestamp without time zone": "timestamp",
-		"timestamp with time zone":    "timestamptz",
-		"timestamptz":                 "timestamptz",
-		"interval":                    "interval",
-		"uuid":                        "uuid",
-		"json":                        "json",
-		"jsonb":                       "jsonb",
-		"bytea":                       "bytea",
-		"inet":                        "inet",
-		"cidr":                        "cidr",
-		"macaddr":                     "macaddr",
-		"xml":                         "xml",
+	// Use the database-formatted type when available. For known built-in types, strip
+	// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
+	// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
+	if strings.TrimSpace(formattedType) != "" {
+		lower := strings.ToLower(strings.TrimSpace(formattedType))
+		isArray := strings.HasSuffix(lower, "[]")
+		base := strings.TrimSuffix(lower, "[]")
+		if idx := strings.Index(base, "("); idx >= 0 {
+			base = strings.TrimSpace(base[:idx])
+		}
+		canonical := pgsql.NormalizePGType(base)
+		if pgsql.IsKnownPGBaseType(canonical) {
+			if isArray {
+				return canonical + "[]"
+			}
+			return canonical
+		}
+		return formattedType
 	}

-	// Try mapped type first
-	if mapped, exists := typeMap[normalizedPGType]; exists {
-		return mapped
+	// Fall back to normalizing the information_schema type name directly.
+	canonical := pgsql.NormalizePGType(normalizedPGType)
+	if pgsql.IsKnownPGBaseType(canonical) {
+		return canonical
 	}

-	// Use pgsql utilities if available
-	if pgsql.ValidSQLType(pgType) {
-		return pgsql.GetSQLType(pgType)
-	}
-
-	// Return UDT name for custom types (including array fallback when needed)
+	// Return UDT name for custom types.
 	if udtName != "" {
 		if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
 			return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
 		return udtName
 	}

-	// Default to the original type
 	return pgType
 }

@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
 		{"unknown_type", "custom", "", "custom"}, // Should return UDT name
 		{"ARRAY", "_text", "", "text[]"},
 		{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
-		{"character varying", "varchar", "character varying(255)", "character varying(255)"},
+		{"character varying", "varchar", "character varying(255)", "varchar"},
 	}

 	for _, tt := range tests {