feat(merge): enhance type conflict detection for columns

* Introduced extractTypeParts function to handle embedded dimensions in type strings.
* Updated columnTypeConflict to utilize new type extraction logic.
* Improved PostgreSQL type normalization and handling in various components.
This commit is contained in:
2026-05-19 19:12:27 +02:00
parent 9235ef5e08
commit 9190df81dd
6 changed files with 141 additions and 110 deletions
+36 -4
View File
@@ -5,9 +5,11 @@ package merge
import (
"fmt"
"strconv"
"strings"
"git.warky.dev/wdevs/relspecgo/pkg/models"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
)
// MergeResult represents the result of a merge operation
@@ -449,10 +451,40 @@ func columnTypeConflict(target, source *models.Column) bool {
return false
}
return normalizeType(target.Type) != normalizeType(source.Type) ||
target.Length != source.Length ||
target.Precision != source.Precision ||
target.Scale != source.Scale
tType, tLen, tPrec, tScale := extractTypeParts(target)
sType, sLen, sPrec, sScale := extractTypeParts(source)
return tType != sType || tLen != sLen || tPrec != sPrec || tScale != sScale
}
// extractTypeParts returns the canonical base type and dimensions for a column,
// handling the case where dimensions are embedded in the type string (e.g. "char(2)")
// rather than stored in the separate Length/Precision/Scale fields.
func extractTypeParts(col *models.Column) (baseType string, length, precision, scale int) {
typeName := strings.ToLower(strings.TrimSpace(col.Type))
length, precision, scale = col.Length, col.Precision, col.Scale
if idx := strings.Index(typeName, "("); idx >= 0 {
inner := strings.TrimRight(strings.TrimSpace(typeName[idx+1:]), ")")
typeName = strings.TrimSpace(typeName[:idx])
parts := strings.Split(inner, ",")
if len(parts) == 2 {
if p, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && p > 0 && precision == 0 {
precision = p
}
if s, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && s > 0 && scale == 0 {
scale = s
}
} else if len(parts) == 1 {
if l, err := strconv.Atoi(strings.TrimSpace(parts[0])); err == nil && l > 0 && length == 0 && precision == 0 {
length = l
}
}
}
typeName = pgsql.NormalizePGType(typeName)
return typeName, length, precision, scale
}
func normalizeType(value string) string {
+58
View File
@@ -45,6 +45,7 @@ var GoToStdTypes = map[string]string{
"sqldate": "date",
"sqltime": "time",
"sqltimestamp": "timestamp",
"time.Time": "timestamp",
}
var GoToPGSQLTypes = map[string]string{
@@ -90,6 +91,7 @@ var GoToPGSQLTypes = map[string]string{
"sqldate": "date",
"sqltime": "time",
"sqltimestamp": "timestamp",
"time.Time": "timestamp",
"citext": "citext",
}
@@ -135,6 +137,62 @@ func ConvertSQLType(anytype string) string {
return anytype
}
// PGTypeCanonical maps PostgreSQL type aliases and synonyms to their canonical base name.
// Input should be a base type (no dimension parameters, lowercase).
var PGTypeCanonical = map[string]string{
// integer aliases
"int": "integer",
"int4": "integer",
"int2": "smallint",
"int8": "bigint",
// float aliases
"float4": "real",
"float8": "double precision",
// bool alias
"bool": "boolean",
// char aliases
"character": "char",
"character varying": "varchar",
"bpchar": "char",
// timestamp aliases
"timestamp without time zone": "timestamp",
"timestamp with time zone": "timestamptz",
// time aliases
"time without time zone": "time",
"time with time zone": "timetz",
// decimal alias
"decimal": "numeric",
}
// knownPGBaseTypes is the set of canonical PostgreSQL base types (no aliases).
var knownPGBaseTypes = map[string]struct{}{
"integer": {}, "bigint": {}, "smallint": {},
"serial": {}, "bigserial": {}, "smallserial": {},
"numeric": {}, "real": {}, "double precision": {}, "money": {},
"varchar": {}, "char": {}, "text": {}, "citext": {},
"boolean": {},
"date": {}, "time": {}, "timetz": {}, "timestamp": {}, "timestamptz": {}, "interval": {},
"uuid": {}, "json": {}, "jsonb": {}, "bytea": {},
"inet": {}, "cidr": {}, "macaddr": {}, "xml": {},
}
// NormalizePGType maps a PostgreSQL base type (no dimension parameters) to its
// canonical form. Unknown types are returned as-is (lowercased).
func NormalizePGType(baseType string) string {
lower := strings.ToLower(strings.TrimSpace(baseType))
if canonical, ok := PGTypeCanonical[lower]; ok {
return canonical
}
return lower
}
// IsKnownPGBaseType reports whether the given name (after NormalizePGType) is a
// recognized built-in PostgreSQL type. Custom types (e.g. vector, postgis) return false.
func IsKnownPGBaseType(baseType string) bool {
_, ok := knownPGBaseTypes[strings.ToLower(strings.TrimSpace(baseType))]
return ok
}
func IsGoType(pTypeName string) bool {
for k := range GoToStdTypes {
if strings.EqualFold(pTypeName, k) {
+28 -62
View File
@@ -259,12 +259,14 @@ func (r *Reader) close() {
}
}
// mapDataType maps PostgreSQL data types while preserving exact type text when available.
// mapDataType maps a PostgreSQL data type to its canonical RelSpec name.
// For known built-in types, dimensions are stripped from the type string (they are
// stored separately in column.Length/Precision/Scale). For custom types (e.g.
// vector(1536), postgis geometries), the full formatted type is preserved.
func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval bool) string {
normalizedPGType := strings.ToLower(strings.TrimSpace(pgType))
// If the column has a nextval default, it's likely a serial type
// Map to the appropriate serial type instead of the base integer type
// Detect serial types from nextval defaults before anything else.
if hasNextval {
switch normalizedPGType {
case "integer", "int", "int4":
@@ -276,73 +278,38 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
}
}
// Prefer the database-provided formatted type; this preserves arrays/custom
// types/modifiers like text[], vector(1536), numeric(10,2), etc.
if strings.TrimSpace(formattedType) != "" {
return formattedType
}
// information_schema reports arrays generically as "ARRAY" with udt_name like "_text".
if strings.EqualFold(pgType, "ARRAY") && strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]"
}
// Map common PostgreSQL types
typeMap := map[string]string{
"integer": "integer",
"bigint": "bigint",
"smallint": "smallint",
"int": "integer",
"int2": "smallint",
"int4": "integer",
"int8": "bigint",
"serial": "serial",
"bigserial": "bigserial",
"smallserial": "smallserial",
"numeric": "numeric",
"decimal": "decimal",
"real": "real",
"double precision": "double precision",
"float4": "real",
"float8": "double precision",
"money": "money",
"character varying": "varchar",
"varchar": "varchar",
"character": "char",
"char": "char",
"text": "text",
"boolean": "boolean",
"bool": "boolean",
"date": "date",
"time": "time",
"time without time zone": "time",
"time with time zone": "timetz",
"timestamp": "timestamp",
"timestamp without time zone": "timestamp",
"timestamp with time zone": "timestamptz",
"timestamptz": "timestamptz",
"interval": "interval",
"uuid": "uuid",
"json": "json",
"jsonb": "jsonb",
"bytea": "bytea",
"inet": "inet",
"cidr": "cidr",
"macaddr": "macaddr",
"xml": "xml",
// Use the database-formatted type when available. For known built-in types, strip
// embedded dimensions (they are stored in column.Length/Precision/Scale separately).
// For unknown/custom types, keep the full formatted string (e.g. vector(1536)).
if strings.TrimSpace(formattedType) != "" {
lower := strings.ToLower(strings.TrimSpace(formattedType))
isArray := strings.HasSuffix(lower, "[]")
base := strings.TrimSuffix(lower, "[]")
if idx := strings.Index(base, "("); idx >= 0 {
base = strings.TrimSpace(base[:idx])
}
canonical := pgsql.NormalizePGType(base)
if pgsql.IsKnownPGBaseType(canonical) {
if isArray {
return canonical + "[]"
}
return canonical
}
return formattedType
}
// Try mapped type first
if mapped, exists := typeMap[normalizedPGType]; exists {
return mapped
// Fall back to normalizing the information_schema type name directly.
canonical := pgsql.NormalizePGType(normalizedPGType)
if pgsql.IsKnownPGBaseType(canonical) {
return canonical
}
// Use pgsql utilities if available
if pgsql.ValidSQLType(pgType) {
return pgsql.GetSQLType(pgType)
}
// Return UDT name for custom types (including array fallback when needed)
// Return UDT name for custom types.
if udtName != "" {
if strings.HasPrefix(udtName, "_") && len(udtName) > 1 {
return udtName[1:] + "[]"
@@ -350,7 +317,6 @@ func (r *Reader) mapDataType(pgType, udtName, formattedType string, hasNextval b
return udtName
}
// Default to the original type
return pgType
}
+1 -1
View File
@@ -198,7 +198,7 @@ func TestMapDataType(t *testing.T) {
{"unknown_type", "custom", "", "custom"}, // Should return UDT name
{"ARRAY", "_text", "", "text[]"},
{"USER-DEFINED", "vector", "vector(1536)", "vector(1536)"},
{"character varying", "varchar", "character varying(255)", "character varying(255)"},
{"character varying", "varchar", "character varying(255)", "varchar"},
}
for _, tt := range tests {
+5 -15
View File
@@ -5,6 +5,8 @@ import (
"regexp"
"strings"
"unicode"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
)
// TemplateFunctions returns a map of custom template functions
@@ -162,24 +164,12 @@ func quoteIdent(s string) string {
// Type conversion functions
// goTypeToSQL converts Go type to PostgreSQL type
// goTypeToSQL converts Go type to PostgreSQL type using the shared pgsql type map.
func goTypeToSQL(goType string) string {
typeMap := map[string]string{
"string": "text",
"int": "integer",
"int32": "integer",
"int64": "bigint",
"float32": "real",
"float64": "double precision",
"bool": "boolean",
"time.Time": "timestamp",
"[]byte": "bytea",
}
if sqlType, ok := typeMap[goType]; ok {
if sqlType, ok := pgsql.GoToPGSQLTypes[goType]; ok {
return sqlType
}
return "text" // Default
return "text"
}
// sqlTypeToGo converts PostgreSQL type to Go type
+13 -28
View File
@@ -2,6 +2,8 @@ package sqlite
import (
"strings"
"git.warky.dev/wdevs/relspecgo/pkg/pgsql"
)
// SQLite type affinities
@@ -15,45 +17,29 @@ const (
// MapPostgreSQLType maps PostgreSQL data types to SQLite type affinities
func MapPostgreSQLType(pgType string) string {
// Normalize the type
normalized := strings.ToLower(strings.TrimSpace(pgType))
// Remove array notation if present
normalized = strings.TrimSuffix(normalized, "[]")
// Remove precision/scale if present
if idx := strings.Index(normalized, "("); idx != -1 {
normalized = normalized[:idx]
}
// Resolve synonyms to canonical form before mapping
normalized = pgsql.NormalizePGType(normalized)
// Map to SQLite type affinity
switch normalized {
// TEXT affinity
case "varchar", "character varying", "text", "char", "character",
"citext", "uuid", "timestamp", "timestamptz", "timestamp with time zone",
"timestamp without time zone", "date", "time", "timetz", "time with time zone",
"time without time zone", "json", "jsonb", "xml", "inet", "cidr", "macaddr":
case "varchar", "char", "text", "citext", "uuid",
"timestamp", "timestamptz", "date", "time", "timetz",
"json", "jsonb", "xml", "inet", "cidr", "macaddr":
return TypeText
// INTEGER affinity
case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
"serial", "smallserial", "bigserial", "boolean", "bool":
case "integer", "smallint", "bigint",
"serial", "smallserial", "bigserial", "boolean":
return TypeInteger
// REAL affinity
case "real", "float", "float4", "float8", "double precision":
case "real", "float", "double precision":
return TypeReal
// NUMERIC affinity
case "numeric", "decimal", "money":
case "numeric", "money":
return TypeNumeric
// BLOB affinity
case "bytea", "blob":
return TypeBlob
default:
// Default to TEXT for unknown types
return TypeText
}
}
@@ -65,10 +51,9 @@ func IsIntegerType(colType string) bool {
if idx := strings.Index(normalized, "("); idx != -1 {
normalized = normalized[:idx]
}
normalized = pgsql.NormalizePGType(normalized)
switch normalized {
case "int", "int2", "int4", "int8", "integer", "smallint", "bigint",
"serial", "smallserial", "bigserial":
case "integer", "smallint", "bigint", "serial", "smallserial", "bigserial":
return true
default:
return false