feat(merge): enhance type conflict detection for columns
* Introduced extractTypeParts function to handle embedded dimensions in type strings. * Updated columnTypeConflict to utilize new type extraction logic. * Improved PostgreSQL type normalization and handling in various components.
This commit is contained in:
+28
-62
@@ -259,12 +259,14 @@ func (r *Reader) close() {
|
||||
}
|
||||
}
|
||||
|
||||
// mapDataType maps PostgreSQL data types while preserving exact type text when available.
|
||||
// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
|
||||
// For known built-in types, dimensions are stripped from the type string (they are
|
||||
// stored separately in column.Length/Precision/Scale). For custom types (e.g.
|
||||
// vector(1536), postgis geometries), the full formatted type is preserved.
|
||||
func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
|
||||
normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))
|
||||
|
||||
// If the column has a nextval default, it's likely a serial type
|
||||
// Map to the appropriate serial type instead of the base integer type
|
||||
// Detect serial types from nextval defaults before anything else.
|
||||
if hasNextval {
|
||||
switch normalizedPGType {
|
||||
case "integer", "int", "int4":
|
||||
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer the database-provided formatted type; this preserves arrays/custom
|
||||
// types/modifiers like text[], vector(1536), numeric(10,2), etc.
|
||||
if strings.TrimSpace(formattedType) != "" {
|
||||
return formattedType
|
||||
}
|
||||
|
||||
// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
|
||||
if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
|
||||
return udtName[1:] + "[]"
|
||||
}
|
||||
|
||||
// Map common PostgreSQL types
|
||||
typeMap := map[string]string{
|
||||
"integer": "integer",
|
||||
"bigint": "bigint",
|
||||
"smallint": "smallint",
|
||||
"int": "integer",
|
||||
"int2": "smallint",
|
||||
"int4": "integer",
|
||||
"int8": "bigint",
|
||||
"serial": "serial",
|
||||
"bigserial": "bigserial",
|
||||
"smallserial": "smallserial",
|
||||
"numeric": "numeric",
|
||||
"decimal": "decimal",
|
||||
"real": "real",
|
||||
"double precision": "double precision",
|
||||
"float4": "real",
|
||||
"float8": "double precision",
|
||||
"money": "money",
|
||||
"character varying": "varchar",
|
||||
"varchar": "varchar",
|
||||
"character": "char",
|
||||
"char": "char",
|
||||
"text": "text",
|
||||
"boolean": "boolean",
|
||||
"bool": "boolean",
|
||||
"date": "date",
|
||||
"time": "time",
|
||||
"time without time zone": "time",
|
||||
"time with time zone": "timetz",
|
||||
"timestamp": "timestamp",
|
||||
"timestamp without time zone": "timestamp",
|
||||
"timestamp with time zone": "timestamptz",
|
||||
"timestamptz": "timestamptz",
|
||||
"interval": "interval",
|
||||
"uuid": "uuid",
|
||||
"json": "json",
|
||||
"jsonb": "jsonb",
|
||||
"bytea": "bytea",
|
||||
"inet": "inet",
|
||||
"cidr": "cidr",
|
||||
"macaddr": "macaddr",
|
||||
"xml": "xml",
|
||||
// Use the database-formatted type when available. For known built-in types, strip
|
||||
// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
|
||||
// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
|
||||
if strings.TrimSpace(formattedType) != "" {
|
||||
lower := strings.ToLower(strings.TrimSpace(formattedType))
|
||||
isArray := strings.HasSuffix(lower, "[]")
|
||||
base := strings.TrimSuffix(lower, "[]")
|
||||
if idx := strings.Index(base, "("); idx >= 0 {
|
||||
base = strings.TrimSpace(base[:idx])
|
||||
}
|
||||
canonical := pgsql.NormalizePGType(base)
|
||||
if pgsql.IsKnownPGBaseType(canonical) {
|
||||
if isArray {
|
||||
return canonical + "[]"
|
||||
}
|
||||
return canonical
|
||||
}
|
||||
return formattedType
|
||||
}
|
||||
|
||||
// Try mapped type first
|
||||
if mapped, exists := typeMap[normalizedPGType]; exists {
|
||||
return mapped
|
||||
// Fall back to normalizing the information_schema type name directly.
|
||||
canonical := pgsql.NormalizePGType(normalizedPGType)
|
||||
if pgsql.IsKnownPGBaseType(canonical) {
|
||||
return canonical
|
||||
}
|
||||
|
||||
// Use pgsql utilities if available
|
||||
if pgsql.ValidSQLType(pgType) {
|
||||
return pgsql.GetSQLType(pgType)
|
||||
}
|
||||
|
||||
// Return UDT name for custom types (including array fallback when needed)
|
||||
// Return UDT name for custom types.
|
||||
if udtName != "" {
|
||||
if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
|
||||
return udtName[1:] + "[]"
|
||||
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
|
||||
return udtName
|
||||
}
|
||||
|
||||
// Default to the original type
|
||||
return pgType
|
||||
}
|
||||
|
||||
|
||||
@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
|
||||
{"unknown_type", "custom", "", "custom"}, // Should return UDT name
|
||||
{"ARRAY", "_text", "", "text[]"},
|
||||
{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
|
||||
{"character varying", "varchar", "character varying(255)", "character varying(255)"},
|
||||
{"character varying", "varchar", "character varying(255)", "varchar"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
Reference in New Issue
Block a user