diff --git a/cmd/gpuaudit/main.go b/cmd/gpuaudit/main.go index 9232ad9..61b3d8a 100644 --- a/cmd/gpuaudit/main.go +++ b/cmd/gpuaudit/main.go @@ -84,7 +84,7 @@ var scanCmd = &cobra.Command{ func init() { scanCmd.Flags().StringVar(&scanProfile, "profile", "", "AWS profile to use") scanCmd.Flags().StringSliceVar(&scanRegions, "region", nil, "AWS regions to scan (default: common GPU regions)") - scanCmd.Flags().StringVar(&scanFormat, "format", "table", "Output format: table, json, markdown, slack") + scanCmd.Flags().StringVar(&scanFormat, "format", "table", "Output format: table, json, markdown, slack, csv") scanCmd.Flags().StringVarP(&scanOutput, "output", "o", "", "Write output to file instead of stdout") scanCmd.Flags().BoolVar(&scanSkipMetrics, "skip-metrics", false, "Skip CloudWatch metrics collection (faster but less accurate)") scanCmd.Flags().BoolVar(&scanSkipSageMaker, "skip-sagemaker", false, "Skip SageMaker endpoint scanning") @@ -191,6 +191,8 @@ func runScan(cmd *cobra.Command, args []string) error { output.FormatMarkdown(w, result) case "slack": return output.FormatSlack(w, result) + case: "csv": + return output.FormatCSV(w, result) default: output.FormatTable(w, result) } diff --git a/go.mod b/go.mod index 9b28a73..6a4732e 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/organizations v1.51.2 github.com/aws/aws-sdk-go-v2/service/sagemaker v1.238.0 github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 + github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 github.com/prometheus/client_model v0.6.2 github.com/prometheus/common v0.67.5 github.com/spf13/cobra v1.10.2 diff --git a/go.sum b/go.sum index 67088e7..ef7d4b0 100644 --- a/go.sum +++ b/go.sum @@ -60,6 +60,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 h1:FWNFq4fM1wPfcK40yHE5UO3RUdSNPaBC+j3PokzA6OQ= +github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= diff --git a/internal/models/models.go b/internal/models/models.go index 153ecec..07b8c6e 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -50,116 +50,116 @@ const ( // GPUInstance represents a discovered GPU resource with its metrics and cost data. type GPUInstance struct { // Identity - InstanceID string `json:"instance_id"` - Source Source `json:"source"` - AccountID string `json:"account_id"` - Region string `json:"region"` - Name string `json:"name"` // from Name tag or endpoint name + InstanceID string `json:"instance_id" csv:"instance_id"` + Source Source `json:"source" csv:"source"` + AccountID string `json:"account_id" csv:"account_id"` + Region string `json:"region" csv:"region"` + Name string `json:"name" csv:"name"` // from Name tag or endpoint name Tags map[string]string `json:"tags,omitempty"` // Network (populated for EC2) - PrivateDnsName string `json:"private_dns_name,omitempty"` + PrivateDnsName string `json:"private_dns_name,omitempty" csv:"private_dns_name,omitempty"` // GPU hardware - InstanceType string `json:"instance_type"` - GPUModel string `json:"gpu_model"` - GPUCount int `json:"gpu_count"` - GPUVRAMGiB float64 `json:"gpu_vram_gib"` - TotalVRAMGiB float64 `json:"total_vram_gib"` + InstanceType string `json:"instance_type" csv:"instance_type"` + GPUModel string `json:"gpu_model" csv:"gpu_model"` + GPUCount int `json:"gpu_count" csv:"gpu_count"` + GPUVRAMGiB float64 `json:"gpu_vram_gib" csv:"gpu_vram_gib"` + TotalVRAMGiB float64 `json:"total_vram_gib" csv:"total_vram_gib"` // Kubernetes (populated for k8s-node source) - ClusterName string `json:"cluster_name,omitempty"` - K8sNodeName string `json:"k8s_node_name,omitempty"` - GPUAllocated int `json:"gpu_allocated,omitempty"` + ClusterName string `json:"cluster_name,omitempty" csv:"cluster_name,omitempty"` + K8sNodeName string `json:"k8s_node_name,omitempty" csv:"k8s_node_name,omitempty"` + GPUAllocated int `json:"gpu_allocated,omitempty" csv:"gpu_allocated,omitempty"` // State - State string `json:"state"` - LaunchTime time.Time `json:"launch_time"` - UptimeHours float64 `json:"uptime_hours"` + State string `json:"state" csv:"state"` + LaunchTime time.Time `json:"launch_time" csv:"launch_time"` + UptimeHours float64 `json:"uptime_hours" csv:"uptime_hours"` // Metrics (nil means unavailable) - AvgCPUPercent *float64 `json:"avg_cpu_percent,omitempty"` - MaxCPUPercent *float64 `json:"max_cpu_percent,omitempty"` - AvgNetworkInBytes *float64 `json:"avg_network_in_bytes,omitempty"` - AvgNetworkOutBytes *float64 `json:"avg_network_out_bytes,omitempty"` - AvgDiskReadOps *float64 `json:"avg_disk_read_ops,omitempty"` - AvgDiskWriteOps *float64 `json:"avg_disk_write_ops,omitempty"` - AvgGPUUtilization *float64 `json:"avg_gpu_utilization,omitempty"` - AvgGPUMemUtilization *float64 `json:"avg_gpu_mem_utilization,omitempty"` - InvocationCount *int64 `json:"invocation_count,omitempty"` + AvgCPUPercent *float64 `json:"avg_cpu_percent,omitempty" csv:"avg_cpu_percent,omitempty"` + MaxCPUPercent *float64 `json:"max_cpu_percent,omitempty" csv:"max_cpu_percent,omitempty"` + AvgNetworkInBytes *float64 `json:"avg_network_in_bytes,omitempty" csv:"avg_network_in_bytes,omitempty"` + AvgNetworkOutBytes *float64 `json:"avg_network_out_bytes,omitempty" csv:"avg_network_out_bytes,omitempty"` + AvgDiskReadOps *float64 `json:"avg_disk_read_ops,omitempty" csv:"avg_disk_read_ops,omitempty"` + AvgDiskWriteOps *float64 `json:"avg_disk_write_ops,omitempty" csv:"avg_disk_write_ops,omitempty"` + AvgGPUUtilization *float64 `json:"avg_gpu_utilization,omitempty" csv:"avg_gpu_utilization,omitempty"` + AvgGPUMemUtilization *float64 `json:"avg_gpu_mem_utilization,omitempty" csv:"avg_gpu_mem_utilization,omitempty"` + InvocationCount *int64 `json:"invocation_count,omitempty" csv:"invocation_count,omitempty"` // Cost - PricingModel string `json:"pricing_model"` // on-demand, spot, reserved, savings-plan - HourlyCost float64 `json:"hourly_cost"` - MonthlyCost float64 `json:"monthly_cost"` - SpotHourlyCost *float64 `json:"spot_hourly_cost,omitempty"` - MTDCost *float64 `json:"mtd_cost,omitempty"` + PricingModel string `json:"pricing_model" csv:"pricing_model"` // on-demand, spot, reserved, savings-plan + HourlyCost float64 `json:"hourly_cost" csv:"hourly_cost"` + MonthlyCost float64 `json:"monthly_cost" csv:"monthly_cost"` + SpotHourlyCost *float64 `json:"spot_hourly_cost,omitempty" csv:"spot_hourly_cost,omitempty"` + MTDCost *float64 `json:"mtd_cost,omitempty" csv:"mtd_cost,omitempty"` // Analysis results (populated by analysis engine) WasteSignals []WasteSignal `json:"waste_signals,omitempty"` Recommendations []Recommendation `json:"recommendations,omitempty"` - EstimatedSavings float64 `json:"estimated_savings"` + EstimatedSavings float64 `json:"estimated_savings" csv:"estimated_savings"` } // WasteSignal represents a detected waste indicator on a GPU instance. type WasteSignal struct { - Type string `json:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations, spot_eligible - Severity Severity `json:"severity"` - Confidence float64 `json:"confidence"` // 0.0 - 1.0 - Evidence string `json:"evidence"` + Type string `json:"type" csv:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations, spot_eligible + Severity Severity `json:"severity" csv:"severity"` + Confidence float64 `json:"confidence" csv:"confidence"` // 0.0 - 1.0 + Evidence string `json:"evidence" csv:"evidence"` } // Recommendation is a specific action the user can take to reduce cost. type Recommendation struct { - Action Action `json:"action"` - Description string `json:"description"` - CurrentMonthlyCost float64 `json:"current_monthly_cost"` - RecommendedMonthlyCost float64 `json:"recommended_monthly_cost"` - MonthlySavings float64 `json:"monthly_savings"` - SavingsPercent float64 `json:"savings_percent"` - Risk Risk `json:"risk"` + Action Action `json:"action" csv:"action"` + Description string `json:"description" csv:"description"` + CurrentMonthlyCost float64 `json:"current_monthly_cost" csv:"current_monthly_cost"` + RecommendedMonthlyCost float64 `json:"recommended_monthly_cost" csv:"recommended_monthly_cost"` + MonthlySavings float64 `json:"monthly_savings" csv:"monthly_savings"` + SavingsPercent float64 `json:"savings_percent" csv:"savings_percent"` + Risk Risk `json:"risk" csv:"risk"` } // ScanResult holds the complete output of a gpuaudit scan. type ScanResult struct { - Timestamp time.Time `json:"timestamp"` - AccountID string `json:"account_id"` - Targets []string `json:"targets,omitempty"` - Regions []string `json:"regions"` - ScanDuration string `json:"scan_duration"` - Instances []GPUInstance `json:"instances"` - Summary ScanSummary `json:"summary"` - TargetSummaries []TargetSummary `json:"target_summaries,omitempty"` - TargetErrors []TargetErrorInfo `json:"target_errors,omitempty"` + Timestamp time.Time `json:"timestamp" csv:"timestamp"` + AccountID string `json:"account_id" csv:"account_id"` + Targets []string `json:"targets,omitempty" csv:"targets,omitempty"` + Regions []string `json:"regions" csv:"regions"` + ScanDuration string `json:"scan_duration" csv:"scan_duration"` + Instances []GPUInstance `json:"instances" csv:"instances"` + Summary ScanSummary `json:"summary" csv:"summary"` + TargetSummaries []TargetSummary `json:"target_summaries,omitempty" csv:"target_summaries,omitempty"` + TargetErrors []TargetErrorInfo `json:"target_errors,omitempty" csv:"target_errors,omitempty"` } // ScanSummary provides aggregate statistics for a scan. type ScanSummary struct { - TotalInstances int `json:"total_instances"` - TotalMonthlyCost float64 `json:"total_monthly_cost"` - TotalEstimatedWaste float64 `json:"total_estimated_waste"` - WastePercent float64 `json:"waste_percent"` - CriticalCount int `json:"critical_count"` - WarningCount int `json:"warning_count"` - InfoCount int `json:"info_count"` - HealthyCount int `json:"healthy_count"` + TotalInstances int `json:"total_instances" csv:"total_instances"` + TotalMonthlyCost float64 `json:"total_monthly_cost" csv:"total_monthly_cost"` + TotalEstimatedWaste float64 `json:"total_estimated_waste" csv:"total_estimated_waste"` + WastePercent float64 `json:"waste_percent" csv:"waste_percent"` + CriticalCount int `json:"critical_count" csv:"critical_count"` + WarningCount int `json:"warning_count" csv:"warning_count"` + InfoCount int `json:"info_count" csv:"info_count"` + HealthyCount int `json:"healthy_count" csv:"healthy_count"` } // TargetSummary provides per-target aggregate statistics. type TargetSummary struct { - Target string `json:"target"` - TotalInstances int `json:"total_instances"` - TotalMonthlyCost float64 `json:"total_monthly_cost"` - TotalEstimatedWaste float64 `json:"total_estimated_waste"` - WastePercent float64 `json:"waste_percent"` - CriticalCount int `json:"critical_count"` - WarningCount int `json:"warning_count"` + Target string `json:"target" csv:"target"` + TotalInstances int `json:"total_instances" csv:"total_instances"` + TotalMonthlyCost float64 `json:"total_monthly_cost" csv:"total_monthly_cost"` + TotalEstimatedWaste float64 `json:"total_estimated_waste" csv:"total_estimated_waste"` + WastePercent float64 `json:"waste_percent" csv:"waste_percent"` + CriticalCount int `json:"critical_count" csv:"critical_count"` + WarningCount int `json:"warning_count" csv:"warning_count"` } // TargetErrorInfo describes a target that failed to scan. type TargetErrorInfo struct { - Target string `json:"target"` - Error string `json:"error"` + Target string `json:"target" csv:"target"` + Error string `json:"error" csv:"error"` } // MaxSeverity returns the highest severity among the given waste signals. diff --git a/internal/output/csv.go b/internal/output/csv.go new file mode 100644 index 0000000..7cec24c --- /dev/null +++ b/internal/output/csv.go @@ -0,0 +1,20 @@ +// Copyright 2026 the gpuaudit authors. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package output + +import ( + "fmt" + "io" + + "github.com/gocarina/gocsv" + "github.com/gpuaudit/cli/internal/models" +) + +// FormatCSV marshals the scan instances as CSV. +func FormatCSV(w io.Writer, result *models.ScanResult) error { + if err := gocsv.Marshal(result.Instances, w); err != nil { + return fmt.Errorf("encoding CSV: %w", err) + } + return nil +} diff --git a/internal/output/csv_test.go b/internal/output/csv_test.go new file mode 100644 index 0000000..1f9876f --- /dev/null +++ b/internal/output/csv_test.go @@ -0,0 +1,216 @@ +// Copyright 2026 the gpuaudit authors. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package output + +import ( + "bytes" + "strings" + "testing" + "time" + + "github.com/gpuaudit/cli/internal/models" +) + +func TestFormatCSV_SingleInstance(t *testing.T) { + result := &models.ScanResult{ + Timestamp: time.Date(2026, 4, 21, 12, 0, 0, 0, time.UTC), + AccountID: "123456789012", + Regions: []string{"us-east-1"}, + ScanDuration: "5s", + Instances: []models.GPUInstance{ + { + InstanceID: "i-abc123", + Source: models.SourceEC2, + AccountID: "123456789012", + Region: "us-east-1", + Name: "ml-training-1", + InstanceType: "p4d.24xlarge", + GPUModel: "A100", + GPUCount: 8, + GPUVRAMGiB: 40, + TotalVRAMGiB: 320, + State: "running", + LaunchTime: time.Date(2026, 4, 20, 0, 0, 0, 0, time.UTC), + UptimeHours: 36, + PricingModel: "on-demand", + HourlyCost: 32.77, + MonthlyCost: 23922.10, + }, + }, + Summary: models.ScanSummary{ + TotalInstances: 1, + TotalMonthlyCost: 23922.10, + }, + } + + var buf bytes.Buffer + if err := FormatCSV(&buf, result); err != nil { + t.Fatalf("FormatCSV() error: %v", err) + } + + out := buf.String() + if out == "" { + t.Fatal("FormatCSV() produced empty output") + } + + // Header row should contain CSV column names from struct tags. + lines := strings.Split(strings.TrimSpace(out), "\n") + if len(lines) < 2 { + t.Fatalf("expected at least header + 1 data row, got %d lines", len(lines)) + } + + header := lines[0] + for _, col := range []string{"instance_id", "source", "region", "instance_type", "gpu_model", "gpu_count", "hourly_cost", "monthly_cost"} { + if !strings.Contains(header, col) { + t.Errorf("header missing column %q", col) + } + } + + // Data row should contain instance values. + data := lines[1] + for _, val := range []string{"i-abc123", "ec2", "us-east-1", "p4d.24xlarge", "A100", "on-demand"} { + if !strings.Contains(data, val) { + t.Errorf("data row missing value %q", val) + } + } +} + +func TestFormatCSV_MultipleInstances(t *testing.T) { + result := &models.ScanResult{ + Timestamp: time.Date(2026, 4, 21, 12, 0, 0, 0, time.UTC), + AccountID: "123456789012", + Regions: []string{"us-east-1"}, + ScanDuration: "3s", + Instances: []models.GPUInstance{ + { + InstanceID: "i-aaa", + Source: models.SourceEC2, + Region: "us-east-1", + InstanceType: "g5.xlarge", + GPUModel: "A10G", + GPUCount: 1, + State: "running", + LaunchTime: time.Date(2026, 4, 20, 0, 0, 0, 0, time.UTC), + PricingModel: "on-demand", + HourlyCost: 1.01, + MonthlyCost: 737.30, + }, + { + InstanceID: "i-bbb", + Source: models.SourceK8sNode, + Region: "eu-west-1", + InstanceType: "p4d.24xlarge", + GPUModel: "A100", + GPUCount: 8, + State: "running", + LaunchTime: time.Date(2026, 4, 19, 0, 0, 0, 0, time.UTC), + PricingModel: "on-demand", + HourlyCost: 32.77, + MonthlyCost: 23922.10, + }, + }, + Summary: models.ScanSummary{ + TotalInstances: 2, + TotalMonthlyCost: 24659.40, + }, + } + + var buf bytes.Buffer + if err := FormatCSV(&buf, result); err != nil { + t.Fatalf("FormatCSV() error: %v", err) + } + + lines := strings.Split(strings.TrimSpace(buf.String()), "\n") + // Header + 2 data rows. + if len(lines) != 3 { + t.Fatalf("expected 3 lines (header + 2 rows), got %d", len(lines)) + } +} + +func TestFormatCSV_NilMetricsOmitted(t *testing.T) { + result := &models.ScanResult{ + Timestamp: time.Date(2026, 4, 21, 12, 0, 0, 0, time.UTC), + ScanDuration: "1s", + Instances: []models.GPUInstance{ + { + InstanceID: "i-nil-metrics", + Source: models.SourceEC2, + InstanceType: "g5.xlarge", + GPUModel: "A10G", + GPUCount: 1, + State: "running", + LaunchTime: time.Date(2026, 4, 20, 0, 0, 0, 0, time.UTC), + PricingModel: "on-demand", + AvgGPUUtilization: nil, + AvgCPUPercent: nil, + }, + }, + Summary: models.ScanSummary{TotalInstances: 1}, + } + + var buf bytes.Buffer + if err := FormatCSV(&buf, result); err != nil { + t.Fatalf("FormatCSV() error: %v", err) + } + + // Should not error on nil pointer fields. + if buf.Len() == 0 { + t.Fatal("FormatCSV() produced empty output for nil metrics") + } +} + +func TestFormatCSV_WithMetrics(t *testing.T) { + gpuUtil := 85.5 + cpuPct := 42.0 + + result := &models.ScanResult{ + Timestamp: time.Date(2026, 4, 21, 12, 0, 0, 0, time.UTC), + ScanDuration: "1s", + Instances: []models.GPUInstance{ + { + InstanceID: "i-with-metrics", + Source: models.SourceEC2, + InstanceType: "p4d.24xlarge", + GPUModel: "A100", + GPUCount: 8, + State: "running", + LaunchTime: time.Date(2026, 4, 20, 0, 0, 0, 0, time.UTC), + PricingModel: "on-demand", + HourlyCost: 32.77, + MonthlyCost: 23922.10, + AvgGPUUtilization: &gpuUtil, + AvgCPUPercent: &cpuPct, + }, + }, + Summary: models.ScanSummary{TotalInstances: 1}, + } + + var buf bytes.Buffer + if err := FormatCSV(&buf, result); err != nil { + t.Fatalf("FormatCSV() error: %v", err) + } + + out := buf.String() + if !strings.Contains(out, "85.5") { + t.Error("expected GPU utilization 85.5 in output") + } + if !strings.Contains(out, "42") { + t.Error("expected CPU percent 42 in output") + } +} + +func TestFormatCSV_EmptyInstances(t *testing.T) { + result := &models.ScanResult{ + Timestamp: time.Date(2026, 4, 21, 12, 0, 0, 0, time.UTC), + ScanDuration: "0s", + Instances: []models.GPUInstance{}, + Summary: models.ScanSummary{}, + } + + var buf bytes.Buffer + err := FormatCSV(&buf, result) + // Empty slice may produce header-only or error — either is valid behavior. + // Just verify no panic. + _ = err +}