Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/gpuaudit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ var iamPolicyCmd = &cobra.Command{
"ec2:DescribeInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeRegions",
"ec2:DescribeSpotPriceHistory",
},
"Resource": "*",
},
Expand Down
48 changes: 48 additions & 0 deletions internal/analysis/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func analyzeInstance(inst *models.GPUInstance) {
ruleSageMakerLowUtil,
ruleSageMakerOversized,
ruleK8sUnallocatedGPU,
ruleSpotEligible,
}
for _, rule := range rules {
rule(inst)
Expand Down Expand Up @@ -347,3 +348,50 @@ func ruleK8sUnallocatedGPU(inst *models.GPUInstance) {
})
}
}

// Rule 8: On-demand instance eligible for Spot pricing.
func ruleSpotEligible(inst *models.GPUInstance) {
if inst.PricingModel != "on-demand" {
return
}
if inst.UptimeHours < 24 {
return
}
if inst.SpotHourlyCost == nil {
return
}
if inst.HourlyCost <= 0 {
return
}

spotHourly := *inst.SpotHourlyCost
savingsPercent := ((inst.HourlyCost - spotHourly) / inst.HourlyCost) * 100
if savingsPercent <= 0 {
return
}

monthlySavings := (inst.HourlyCost - spotHourly) * 730
spotMonthlyCost := spotHourly * 730

// Higher savings → higher confidence
confidence := 0.35 + (savingsPercent / 120)
if confidence > 0.95 {
confidence = 0.95
}

inst.WasteSignals = append(inst.WasteSignals, models.WasteSignal{
Type: "spot_eligible",
Severity: models.SeverityInfo,
Confidence: confidence,
Evidence: fmt.Sprintf("Spot pricing available at $%.3f/hr vs $%.3f/hr on-demand (%.0f%% savings).", spotHourly, inst.HourlyCost, savingsPercent),
})
inst.Recommendations = append(inst.Recommendations, models.Recommendation{
Action: models.ActionChangePricing,
Description: fmt.Sprintf("Spot pricing available at $%.2f/hr (%.0f%% savings). Spot instances may be interrupted — suitable for fault-tolerant workloads.", spotHourly, savingsPercent),
CurrentMonthlyCost: inst.MonthlyCost,
RecommendedMonthlyCost: spotMonthlyCost,
MonthlySavings: monthlySavings,
SavingsPercent: savingsPercent,
Risk: models.RiskHigh,
})
}
114 changes: 114 additions & 0 deletions internal/analysis/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,117 @@ func TestAnalyzeAll_ComputesSavings(t *testing.T) {
t.Errorf("expected no signals for healthy instance, got %d", len(instances[1].WasteSignals))
}
}

func TestRuleSpotEligible_FlagsOnDemandWithSpotPrice(t *testing.T) {
spotPrice := 0.556
inst := models.GPUInstance{
InstanceID: "i-test",
Source: models.SourceEC2,
PricingModel: "on-demand",
UptimeHours: 48,
HourlyCost: 1.006,
MonthlyCost: 1.006 * 730,
SpotHourlyCost: &spotPrice,
}

ruleSpotEligible(&inst)

if len(inst.WasteSignals) != 1 {
t.Fatalf("expected 1 signal, got %d", len(inst.WasteSignals))
}
if inst.WasteSignals[0].Type != "spot_eligible" {
t.Errorf("expected spot_eligible, got %s", inst.WasteSignals[0].Type)
}
if inst.WasteSignals[0].Severity != models.SeverityInfo {
t.Errorf("expected info severity, got %s", inst.WasteSignals[0].Severity)
}
if len(inst.Recommendations) != 1 {
t.Fatalf("expected 1 recommendation, got %d", len(inst.Recommendations))
}
if inst.Recommendations[0].Action != models.ActionChangePricing {
t.Errorf("expected change_pricing, got %s", inst.Recommendations[0].Action)
}
expectedSavings := (1.006 - 0.556) * 730
diff := inst.Recommendations[0].MonthlySavings - expectedSavings
if diff < -0.01 || diff > 0.01 {
t.Errorf("expected savings %.2f, got %.2f", expectedSavings, inst.Recommendations[0].MonthlySavings)
}
}

func TestRuleSpotEligible_SkipsSpotInstances(t *testing.T) {
spotPrice := 0.556
inst := models.GPUInstance{
PricingModel: "spot",
UptimeHours: 48,
SpotHourlyCost: &spotPrice,
}

ruleSpotEligible(&inst)

if len(inst.WasteSignals) != 0 {
t.Errorf("expected no signals for spot instance, got %d", len(inst.WasteSignals))
}
}

func TestRuleSpotEligible_SkipsRecentInstances(t *testing.T) {
spotPrice := 0.556
inst := models.GPUInstance{
PricingModel: "on-demand",
UptimeHours: 12,
SpotHourlyCost: &spotPrice,
}

ruleSpotEligible(&inst)

if len(inst.WasteSignals) != 0 {
t.Errorf("expected no signals for recent instance, got %d", len(inst.WasteSignals))
}
}

func TestRuleSpotEligible_SkipsWhenNoSpotPrice(t *testing.T) {
inst := models.GPUInstance{
PricingModel: "on-demand",
UptimeHours: 48,
SpotHourlyCost: nil,
}

ruleSpotEligible(&inst)

if len(inst.WasteSignals) != 0 {
t.Errorf("expected no signals when spot price unavailable, got %d", len(inst.WasteSignals))
}
}

func TestRuleSpotEligible_ConfidenceScalesWithSavings(t *testing.T) {
tests := []struct {
name string
onDemand float64
spotPrice float64
minConfidence float64
}{
{"large_savings_60pct", 1.0, 0.4, 0.85},
{"moderate_savings_40pct", 1.0, 0.6, 0.65},
{"small_savings_20pct", 1.0, 0.8, 0.5},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
inst := models.GPUInstance{
PricingModel: "on-demand",
UptimeHours: 48,
HourlyCost: tt.onDemand,
MonthlyCost: tt.onDemand * 730,
SpotHourlyCost: &tt.spotPrice,
}

ruleSpotEligible(&inst)

if len(inst.WasteSignals) == 0 {
t.Fatal("expected signal")
}
if inst.WasteSignals[0].Confidence < tt.minConfidence {
t.Errorf("expected confidence >= %.2f, got %.2f", tt.minConfidence, inst.WasteSignals[0].Confidence)
}
})
}
}
11 changes: 6 additions & 5 deletions internal/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ type GPUInstance struct {
InvocationCount *int64 `json:"invocation_count,omitempty"`

// Cost
PricingModel string `json:"pricing_model"` // on-demand, spot, reserved, savings-plan
HourlyCost float64 `json:"hourly_cost"`
MonthlyCost float64 `json:"monthly_cost"`
MTDCost *float64 `json:"mtd_cost,omitempty"`
PricingModel string `json:"pricing_model"` // on-demand, spot, reserved, savings-plan
HourlyCost float64 `json:"hourly_cost"`
MonthlyCost float64 `json:"monthly_cost"`
SpotHourlyCost *float64 `json:"spot_hourly_cost,omitempty"`
MTDCost *float64 `json:"mtd_cost,omitempty"`

// Analysis results (populated by analysis engine)
WasteSignals []WasteSignal `json:"waste_signals,omitempty"`
Expand All @@ -98,7 +99,7 @@ type GPUInstance struct {

// WasteSignal represents a detected waste indicator on a GPU instance.
type WasteSignal struct {
Type string `json:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations
Type string `json:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations, spot_eligible
Severity Severity `json:"severity"`
Confidence float64 `json:"confidence"` // 0.0 - 1.0
Evidence string `json:"evidence"`
Expand Down
1 change: 1 addition & 0 deletions internal/providers/aws/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ func scanRegion(ctx context.Context, cfg aws.Config, accountID, region string, o
if err := EnrichEC2Metrics(ctx, cwClient, ec2Instances, opts.MetricWindow); err != nil {
fmt.Fprintf(os.Stderr, " warning: could not enrich EC2 metrics in %s: %v\n", region, err)
}
EnrichSpotPrices(ctx, ec2Client, ec2Instances)
}
allInstances = append(allInstances, ec2Instances...)
}
Expand Down
89 changes: 89 additions & 0 deletions internal/providers/aws/spot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright 2026 the gpuaudit authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package aws

import (
"context"
"fmt"
"os"
"strconv"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"

"github.com/gpuaudit/cli/internal/models"
)

// SpotPriceClient is the subset of the EC2 API needed for spot price lookups.
type SpotPriceClient interface {
DescribeSpotPriceHistory(ctx context.Context, params *ec2.DescribeSpotPriceHistoryInput, optFns ...func(*ec2.Options)) (*ec2.DescribeSpotPriceHistoryOutput, error)
}

// EnrichSpotPrices fetches current spot prices for EC2 GPU instances and
// populates SpotHourlyCost on each instance where spot is available.
func EnrichSpotPrices(ctx context.Context, client SpotPriceClient, instances []models.GPUInstance) {
// Collect unique EC2 instance types.
typeSet := make(map[string]bool)
for _, inst := range instances {
if inst.Source == models.SourceEC2 {
typeSet[inst.InstanceType] = true
}
}
if len(typeSet) == 0 {
return
}

instanceTypes := make([]ec2types.InstanceType, 0, len(typeSet))
for t := range typeSet {
instanceTypes = append(instanceTypes, ec2types.InstanceType(t))
}

input := &ec2.DescribeSpotPriceHistoryInput{
InstanceTypes: instanceTypes,
ProductDescriptions: []string{"Linux/UNIX"},
StartTime: aws.Time(time.Now().Add(-1 * time.Hour)),
}

out, err := client.DescribeSpotPriceHistory(ctx, input)
if err != nil {
fmt.Fprintf(os.Stderr, " warning: could not fetch spot prices: %v\n", err)
return
}

// Take the most recent price per instance type. The API returns entries
// per (type, AZ) sorted newest-first. We collapse across AZs — spot prices
// within a region are typically within a few percent. A 1-hour window with
// a handful of GPU types fits well within a single API page (1000 entries).
latestPrice := make(map[string]float64)
for _, sp := range out.SpotPriceHistory {
itype := string(sp.InstanceType)
if _, seen := latestPrice[itype]; seen {
continue
}
price, err := strconv.ParseFloat(aws.ToString(sp.SpotPrice), 64)
if err != nil {
continue
}
latestPrice[itype] = price
}

// Populate SpotHourlyCost on matching instances and correct cost for
// instances already running as spot.
for i := range instances {
if instances[i].Source != models.SourceEC2 {
continue
}
price, ok := latestPrice[instances[i].InstanceType]
if !ok {
continue
}
instances[i].SpotHourlyCost = &price
if instances[i].PricingModel == "spot" {
instances[i].HourlyCost = price
instances[i].MonthlyCost = price * 730
}
}
}
Loading