feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings.
* Updated columnTypeConflict to utilize new type extraction logic.
* Improved PostgreSQL type normalization and handling in various components.
This commit is contained in:
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
+28 -62
View File
@@ -259,12 +259,14 @@ func (r *Reader) close() {
}
}
// mapDataType maps PostgreSQL data types while preserving exact type text when available.
// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
// For known built-in types, dimensions are stripped from the type string (they are
// stored separately in column.Length/Precision/Scale). For custom types (e.g.
// vector(1536), postgis geometries), the full formatted type is preserved.
func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))
// If the column has a nextval default, it's likely a serial type
// Map to the appropriate serial type instead of the base integer type
// Detect serial types from nextval defaults before anything else.
if hasNextval {
switch normalizedPGType {
case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
}
}
// Prefer the database-provided formatted type; this preserves arrays/custom
// types/modifiers like text[], vector(1536), numeric(10,2), etc.
if strings.TrimSpace(formattedType) != "" {
return formattedType
}
// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]"
}
// Map common PostgreSQL types
typeMap := map[string]string{
"integer": "integer",
"bigint": "bigint",
"smallint": "smallint",
"int": "integer",
"int2": "smallint",
"int4": "integer",
"int8": "bigint",
"serial": "serial",
"bigserial": "bigserial",
"smallserial": "smallserial",
"numeric": "numeric",
"decimal": "decimal",
"real": "real",
"double precision": "double precision",
"float4": "real",
"float8": "double precision",
"money": "money",
"character varying": "varchar",
"varchar": "varchar",
"character": "char",
"char": "char",
"text": "text",
"boolean": "boolean",
"bool": "boolean",
"date": "date",
"time": "time",
"time without time zone": "time",
"time with time zone": "timetz",
"timestamp": "timestamp",
"timestamp without time zone": "timestamp",
"timestamp with time zone": "timestamptz",
"timestamptz": "timestamptz",
"interval": "interval",
"uuid": "uuid",
"json": "json",
"jsonb": "jsonb",
"bytea": "bytea",
"inet": "inet",
"cidr": "cidr",
"macaddr": "macaddr",
"xml": "xml",
// Use the database-formatted type when available. For known built-in types, strip
// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
if strings.TrimSpace(formattedType) != "" {
lower := strings.ToLower(strings.TrimSpace(formattedType))
isArray := strings.HasSuffix(lower, "[]")
base := strings.TrimSuffix(lower, "[]")
if idx := strings.Index(base, "("); idx >= 0 {
base = strings.TrimSpace(base[:idx])
}
canonical := pgsql.NormalizePGType(base)
if pgsql.IsKnownPGBaseType(canonical) {
if isArray {
return canonical + "[]"
}
return canonical
}
return formattedType
}
// Try mapped type first
if mapped, exists := typeMap[normalizedPGType]; exists {
return mapped
// Fall back to normalizing the information_schema type name directly.
canonical := pgsql.NormalizePGType(normalizedPGType)
if pgsql.IsKnownPGBaseType(canonical) {
return canonical
}
// Use pgsql utilities if available
if pgsql.ValidSQLType(pgType) {
return pgsql.GetSQLType(pgType)
}
// Return UDT name for custom types (including array fallback when needed)
// Return UDT name for custom types.
if udtName != "" {
if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
return udtName
}
// Default to the original type
return pgType
}
+1 -1
View File
@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
{"unknown_type", "custom", "", "custom"}, // Should return UDT name
{"ARRAY", "_text", "", "text[]"},
{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
{"character varying", "varchar", "character varying(255)", "character varying(255)"},
{"character varying", "varchar", "character varying(255)", "varchar"},
}
for _, tt := range tests {