relspecgo/cmd/relspec/split.go

package main

import (
	"fmt"
	"os"
	"strings"

	"github.com/spf13/cobra"

	"git.warky.dev/wdevs/relspecgo/pkg/models"
)

var (
	splitSourceType    string
	splitSourcePath    string
	splitSourceConn    string
	splitTargetType    string
	splitTargetPath    string
	splitSchemas       string
	splitTables        string
	splitPackageName   string
	splitDatabaseName  string
	splitExcludeSchema string
	splitExcludeTables string
)

var splitCmd = &cobra.Command{
	Use:   "split",
	Short: "Split database schemas to extract selected tables into a separate database",
	Long: `Extract selected schemas and tables from a database and write them to a separate output.

The split command allows you to:
- Select specific schemas to include in the output
- Select specific tables within schemas
- Exclude specific schemas or tables if preferred
- Export the selected subset to any supported format

Input formats:
  - dbml: DBML schema files
  - dctx: DCTX schema files
  - drawdb: DrawDB JSON files
  - graphql: GraphQL schema files (.graphql, SDL)
  - json: JSON database schema
  - yaml: YAML database schema
  - gorm: GORM model files (Go, file or directory)
  - bun: Bun model files (Go, file or directory)
  - drizzle: Drizzle ORM schema files (TypeScript, file or directory)
  - prisma: Prisma schema files (.prisma)
  - typeorm: TypeORM entity files (TypeScript)
  - pgsql: PostgreSQL database (live connection)

Output formats:
  - dbml: DBML schema files
  - dctx: DCTX schema files
  - drawdb: DrawDB JSON files
  - graphql: GraphQL schema files (.graphql, SDL)
  - json: JSON database schema
  - yaml: YAML database schema
  - gorm: GORM model files (Go)
  - bun: Bun model files (Go)
  - drizzle: Drizzle ORM schema files (TypeScript)
  - prisma: Prisma schema files (.prisma)
  - typeorm: TypeORM entity files (TypeScript)
  - pgsql: PostgreSQL SQL schema

Examples:
  # Split specific schemas from DBML
  relspec split --from dbml --from-path schema.dbml \
                --schemas public,auth \
                --to json --to-path subset.json

  # Extract specific tables from PostgreSQL
  relspec split --from pgsql \
                --from-conn "postgres://user:pass@localhost:5432/mydb" \
                --schemas public \
                --tables users,orders,products \
                --to dbml --to-path subset.dbml

  # Exclude specific tables
  relspec split --from json --from-path schema.json \
                --exclude-tables "audit_log,system_config,temp_data" \
                --to json --to-path public_schema.json

  # Split and convert to GORM
  relspec split --from json --from-path schema.json \
                --tables "users,posts,comments" \
                --to gorm --to-path models/ --package models \
                --database-name MyAppDB

  # Exclude specific schema and tables
  relspec split --from pgsql \
                --from-conn "postgres://user:pass@localhost/db" \
                --exclude-schema pg_catalog,information_schema \
                --exclude-tables "temp_users,debug_logs" \
                --to json --to-path public_schema.json`,
	RunE: runSplit,
}

func init() {
	splitCmd.Flags().StringVar(&splitSourceType, "from", "", "Source format (dbml, dctx, drawdb, graphql, json, yaml, gorm, bun, drizzle, prisma, typeorm, pgsql)")
	splitCmd.Flags().StringVar(&splitSourcePath, "from-path", "", "Source file path (for file-based formats)")
	splitCmd.Flags().StringVar(&splitSourceConn, "from-conn", "", "Source connection string (for database formats)")

	splitCmd.Flags().StringVar(&splitTargetType, "to", "", "Target format (dbml, dctx, drawdb, graphql, json, yaml, gorm, bun, drizzle, prisma, typeorm, pgsql)")
	splitCmd.Flags().StringVar(&splitTargetPath, "to-path", "", "Target output path (file or directory)")
	splitCmd.Flags().StringVar(&splitPackageName, "package", "", "Package name (for code generation formats like gorm/bun)")
	splitCmd.Flags().StringVar(&splitDatabaseName, "database-name", "", "Override database name in output")

	splitCmd.Flags().StringVar(&splitSchemas, "schemas", "", "Comma-separated list of schema names to include")
	splitCmd.Flags().StringVar(&splitTables, "tables", "", "Comma-separated list of table names to include (case-insensitive)")
	splitCmd.Flags().StringVar(&splitExcludeSchema, "exclude-schema", "", "Comma-separated list of schema names to exclude")
	splitCmd.Flags().StringVar(&splitExcludeTables, "exclude-tables", "", "Comma-separated list of table names to exclude (case-insensitive)")

	err := splitCmd.MarkFlagRequired("from")
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error marking from flag as required: %v\n", err)
	}
	err = splitCmd.MarkFlagRequired("to")
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error marking to flag as required: %v\n", err)
	}
	err = splitCmd.MarkFlagRequired("to-path")
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error marking to-path flag as required: %v\n", err)
	}
}

func runSplit(cmd *cobra.Command, args []string) error {
	fmt.Fprintf(os.Stderr, "\n=== RelSpec Schema Split ===\n")
	fmt.Fprintf(os.Stderr, "Started at: %s\n\n", getCurrentTimestamp())

	// Read source database
	fmt.Fprintf(os.Stderr, "[1/3] Reading source schema...\n")
	fmt.Fprintf(os.Stderr, "      Format: %s\n", splitSourceType)
	if splitSourcePath != "" {
		fmt.Fprintf(os.Stderr, "      Path:   %s\n", splitSourcePath)
	}
	if splitSourceConn != "" {
		fmt.Fprintf(os.Stderr, "      Conn:   %s\n", maskPassword(splitSourceConn))
	}

	db, err := readDatabaseForConvert(splitSourceType, splitSourcePath, splitSourceConn)
	if err != nil {
		return fmt.Errorf("failed to read source: %w", err)
	}

	fmt.Fprintf(os.Stderr, "      ✓ Successfully read database '%s'\n", db.Name)
	fmt.Fprintf(os.Stderr, "      Found: %d schema(s)\n", len(db.Schemas))

	totalTables := 0
	for _, schema := range db.Schemas {
		totalTables += len(schema.Tables)
	}
	fmt.Fprintf(os.Stderr, "      Found: %d table(s)\n\n", totalTables)

	// Filter the database
	fmt.Fprintf(os.Stderr, "[2/3] Filtering schemas and tables...\n")
	filteredDB, err := filterDatabase(db)
	if err != nil {
		return fmt.Errorf("failed to filter database: %w", err)
	}

	if splitDatabaseName != "" {
		filteredDB.Name = splitDatabaseName
	}

	filteredTables := 0
	for _, schema := range filteredDB.Schemas {
		filteredTables += len(schema.Tables)
	}
	fmt.Fprintf(os.Stderr, "      ✓ Filtered to: %d schema(s), %d table(s)\n\n", len(filteredDB.Schemas), filteredTables)

	// Write to target format
	fmt.Fprintf(os.Stderr, "[3/3] Writing to target format...\n")
	fmt.Fprintf(os.Stderr, "      Format: %s\n", splitTargetType)
	fmt.Fprintf(os.Stderr, "      Output: %s\n", splitTargetPath)
	if splitPackageName != "" {
		fmt.Fprintf(os.Stderr, "      Package: %s\n", splitPackageName)
	}

	err = writeDatabase(
		filteredDB,
		splitTargetType,
		splitTargetPath,
		splitPackageName,
		"", // no schema filter for split
	)
	if err != nil {
		return fmt.Errorf("failed to write output: %w", err)
	}

	fmt.Fprintf(os.Stderr, "      ✓ Successfully written to '%s'\n\n", splitTargetPath)
	fmt.Fprintf(os.Stderr, "=== Split Completed Successfully ===\n")
	fmt.Fprintf(os.Stderr, "Completed at: %s\n\n", getCurrentTimestamp())

	return nil
}

// filterDatabase filters the database based on provided criteria
func filterDatabase(db *models.Database) (*models.Database, error) {
	filteredDB := &models.Database{
		Name:            db.Name,
		Description:     db.Description,
		Comment:         db.Comment,
		DatabaseType:    db.DatabaseType,
		DatabaseVersion: db.DatabaseVersion,
		SourceFormat:    db.SourceFormat,
		UpdatedAt:       db.UpdatedAt,
		GUID:            db.GUID,
		Schemas:         []*models.Schema{},
		Domains:         db.Domains, // Keep domains for now
	}

	// Parse filter flags
	includeSchemas := parseCommaSeparated(splitSchemas)
	includeTables := parseCommaSeparated(splitTables)
	excludeSchemas := parseCommaSeparated(splitExcludeSchema)
	excludeTables := parseCommaSeparated(splitExcludeTables)

	// Convert table names to lowercase for case-insensitive matching
	includeTablesLower := make(map[string]bool)
	for _, t := range includeTables {
		includeTablesLower[strings.ToLower(t)] = true
	}

	excludeTablesLower := make(map[string]bool)
	for _, t := range excludeTables {
		excludeTablesLower[strings.ToLower(t)] = true
	}

	// Iterate through schemas
	for _, schema := range db.Schemas {
		// Check if schema should be excluded
		if contains(excludeSchemas, schema.Name) {
			continue
		}

		// Check if schema should be included
		if len(includeSchemas) > 0 && !contains(includeSchemas, schema.Name) {
			continue
		}

		// Create a copy of the schema with filtered tables
		filteredSchema := &models.Schema{
			Name:        schema.Name,
			Description: schema.Description,
			Owner:       schema.Owner,
			Permissions: schema.Permissions,
			Comment:     schema.Comment,
			Metadata:    schema.Metadata,
			Scripts:     schema.Scripts,
			Sequence:    schema.Sequence,
			Relations:   schema.Relations,
			Enums:       schema.Enums,
			UpdatedAt:   schema.UpdatedAt,
			GUID:        schema.GUID,
			Tables:      []*models.Table{},
			Views:       schema.Views,
			Sequences:   schema.Sequences,
		}

		// Filter tables within the schema
		for _, table := range schema.Tables {
			tableLower := strings.ToLower(table.Name)

			// Check if table should be excluded
			if excludeTablesLower[tableLower] {
				continue
			}

			// If specific tables are requested, only include those
			if len(includeTablesLower) > 0 {
				if !includeTablesLower[tableLower] {
					continue
				}
			}
			filteredSchema.Tables = append(filteredSchema.Tables, table)
		}

		// Only add schema if it has tables (unless no table filter was specified)
		if len(filteredSchema.Tables) > 0 || (len(includeTablesLower) == 0 && len(excludeTablesLower) == 0) {
			filteredDB.Schemas = append(filteredDB.Schemas, filteredSchema)
		}
	}

	if len(filteredDB.Schemas) == 0 {
		return nil, fmt.Errorf("no schemas matched the filter criteria")
	}

	return filteredDB, nil
}

// parseCommaSeparated parses a comma-separated string into a slice, trimming whitespace
func parseCommaSeparated(s string) []string {
	if s == "" {
		return []string{}
	}

	parts := strings.Split(s, ",")
	result := make([]string, 0, len(parts))
	for _, p := range parts {
		trimmed := strings.TrimSpace(p)
		if trimmed != "" {
			result = append(result, trimmed)
		}
	}
	return result
}

// contains checks if a string is in a slice
func contains(slice []string, item string) bool {
	for _, s := range slice {
		if s == item {
			return true
		}
	}
	return false
}