Skip to content

Commit 5b13734

Browse files
authored
Do not send telemetry data if failure in collection (#1731)
Fix error handling in telemetry and usage reporting Problem: We send empty telemetry data if there is any failure in collection of data. We also do the same for NIM usage reporting. Solution: Don't send telemetry data in case of failure of collection. Also does not send data for NIM usage reporting if there was any failure. Refactored some tests to comply with code coverage.
1 parent 16bdfa8 commit 5b13734

File tree

5 files changed

+157
-50
lines changed

5 files changed

+157
-50
lines changed

internal/framework/events/events_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,5 @@ func TestEventLoop_SwapBatches(t *testing.T) {
3131
g.Expect(eventLoop.currentBatch).To(HaveLen(len(nextBatch)))
3232
g.Expect(eventLoop.currentBatch).To(Equal(nextBatch))
3333
g.Expect(eventLoop.nextBatch).To(BeEmpty())
34-
g.Expect(cap(eventLoop.nextBatch)).To(Equal(3))
34+
g.Expect(eventLoop.nextBatch).To(HaveCap(3))
3535
}

internal/mode/static/telemetry/job_worker.go

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ func CreateTelemetryJobWorker(
2727
data, err := dataCollector.Collect(ctx)
2828
if err != nil {
2929
logger.Error(err, "Failed to collect telemetry data")
30+
return
3031
}
3132

3233
// Export telemetry

internal/mode/static/telemetry/job_worker_test.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package telemetry_test
22

33
import (
44
"context"
5+
"errors"
56
"testing"
67
"time"
78

@@ -13,7 +14,7 @@ import (
1314
"github.com./nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes"
1415
)
1516

16-
func TestCreateTelemetryJobWorker(t *testing.T) {
17+
func TestCreateTelemetryJobWorker_Succeeds(t *testing.T) {
1718
g := NewWithT(t)
1819

1920
exporter := &telemetryfakes.FakeExporter{}
@@ -36,3 +37,22 @@ func TestCreateTelemetryJobWorker(t *testing.T) {
3637
_, data := exporter.ExportArgsForCall(0)
3738
g.Expect(data).To(Equal(&expData))
3839
}
40+
41+
func TestCreateTelemetryJobWorker_CollectFails(t *testing.T) {
42+
g := NewWithT(t)
43+
44+
exporter := &telemetryfakes.FakeExporter{}
45+
dataCollector := &telemetryfakes.FakeDataCollector{}
46+
47+
worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector)
48+
49+
expData := telemetry.Data{}
50+
dataCollector.CollectReturns(expData, errors.New("failed to collect cluster information"))
51+
52+
timeout := 10 * time.Second
53+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
54+
defer cancel()
55+
56+
worker(ctx)
57+
g.Expect(exporter.ExportCallCount()).To(Equal(0))
58+
}

internal/mode/static/usage/job_worker.go

+3
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,19 @@ func CreateUsageJobWorker(
2323
nodeCount, err := CollectNodeCount(ctx, k8sClient)
2424
if err != nil {
2525
logger.Error(err, "Failed to collect node count")
26+
return
2627
}
2728

2829
podCount, err := GetTotalNGFPodCount(ctx, k8sClient)
2930
if err != nil {
3031
logger.Error(err, "Failed to collect replica count")
32+
return
3133
}
3234

3335
clusterUID, err := telemetry.CollectClusterID(ctx, k8sClient)
3436
if err != nil {
3537
logger.Error(err, "Failed to collect cluster UID")
38+
return
3639
}
3740

3841
clusterDetails := ClusterDetails{

internal/mode/static/usage/job_worker_test.go

+131-48
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,26 @@ package usage_test
22

33
import (
44
"context"
5+
"errors"
56
"testing"
67
"time"
78

89
. "github.com./onsi/gomega"
910
appsv1 "k8s.io/api/apps/v1"
1011
v1 "k8s.io/api/core/v1"
1112
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/types"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
1215
"sigs.k8s.io/controller-runtime/pkg/client/fake"
1316
"sigs.k8s.io/controller-runtime/pkg/log/zap"
1417

18+
"github.com./nginxinc/nginx-gateway-fabric/internal/framework/events/eventsfakes"
1519
"github.com./nginxinc/nginx-gateway-fabric/internal/mode/static/config"
1620
"github.com./nginxinc/nginx-gateway-fabric/internal/mode/static/usage"
1721
"github.com./nginxinc/nginx-gateway-fabric/internal/mode/static/usage/usagefakes"
1822
)
1923

2024
func TestCreateUsageJobWorker(t *testing.T) {
21-
g := NewWithT(t)
22-
2325
replicas := int32(1)
2426
ngfReplicaSet := &appsv1.ReplicaSet{
2527
ObjectMeta: metav1.ObjectMeta{
@@ -34,64 +36,145 @@ func TestCreateUsageJobWorker(t *testing.T) {
3436
},
3537
}
3638

37-
ngfPod := &v1.Pod{
38-
ObjectMeta: metav1.ObjectMeta{
39-
Namespace: "nginx-gateway",
40-
Name: "ngf-pod",
41-
OwnerReferences: []metav1.OwnerReference{
42-
{
43-
Kind: "ReplicaSet",
44-
Name: "ngf-replicaset",
39+
tests := []struct {
40+
name string
41+
listCalls func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error
42+
getCalls func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error
43+
expData usage.ClusterDetails
44+
expErr bool
45+
}{
46+
{
47+
name: "succeeds",
48+
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
49+
switch typedList := object.(type) {
50+
case *v1.NodeList:
51+
typedList.Items = append(typedList.Items, v1.Node{})
52+
return nil
53+
case *appsv1.ReplicaSetList:
54+
typedList.Items = append(typedList.Items, *ngfReplicaSet)
55+
return nil
56+
}
57+
return nil
58+
},
59+
getCalls: func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error {
60+
switch typedObject := object.(type) {
61+
case *v1.Namespace:
62+
typedObject.Name = metav1.NamespaceSystem
63+
typedObject.UID = "1234abcd"
64+
return nil
65+
}
66+
return nil
67+
},
68+
expData: usage.ClusterDetails{
69+
Metadata: usage.Metadata{
70+
UID: "1234abcd",
71+
DisplayName: "my-cluster",
72+
},
73+
NodeCount: 1,
74+
PodDetails: usage.PodDetails{
75+
CurrentPodCounts: usage.CurrentPodsCount{
76+
PodCount: 1,
77+
},
4578
},
4679
},
80+
expErr: false,
4781
},
48-
}
49-
50-
kubeSystem := &v1.Namespace{
51-
ObjectMeta: metav1.ObjectMeta{
52-
Name: metav1.NamespaceSystem,
53-
UID: "1234abcd",
54-
},
55-
}
56-
57-
k8sClient := fake.NewFakeClient(&v1.Node{}, ngfReplicaSet, ngfPod, kubeSystem)
58-
reporter := &usagefakes.FakeReporter{}
59-
60-
worker := usage.CreateUsageJobWorker(
61-
zap.New(),
62-
k8sClient,
63-
reporter,
64-
config.Config{
65-
GatewayPodConfig: config.GatewayPodConfig{
66-
Namespace: "nginx-gateway",
67-
Name: "ngf-pod",
82+
{
83+
name: "collect node count fails",
84+
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
85+
switch object.(type) {
86+
case *v1.NodeList:
87+
return errors.New("failed to collect node list")
88+
}
89+
return nil
6890
},
69-
UsageReportConfig: &config.UsageReportConfig{
70-
ClusterDisplayName: "my-cluster",
91+
getCalls: func(_ context.Context, _ types.NamespacedName, _ client.Object, _ ...client.GetOption) error {
92+
return nil
7193
},
94+
expData: usage.ClusterDetails{},
95+
expErr: true,
7296
},
73-
)
74-
75-
expData := usage.ClusterDetails{
76-
Metadata: usage.Metadata{
77-
UID: "1234abcd",
78-
DisplayName: "my-cluster",
97+
{
98+
name: "collect replica count fails",
99+
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
100+
switch typedList := object.(type) {
101+
case *v1.NodeList:
102+
typedList.Items = append(typedList.Items, v1.Node{})
103+
return nil
104+
case *appsv1.ReplicaSetList:
105+
return errors.New("failed to collect replica set list")
106+
}
107+
return nil
108+
},
109+
getCalls: func(_ context.Context, _ types.NamespacedName, _ client.Object, _ ...client.GetOption) error {
110+
return nil
111+
},
112+
expData: usage.ClusterDetails{},
113+
expErr: true,
79114
},
80-
NodeCount: 1,
81-
PodDetails: usage.PodDetails{
82-
CurrentPodCounts: usage.CurrentPodsCount{
83-
PodCount: 1,
115+
{
116+
name: "collect cluster UID fails",
117+
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
118+
switch typedList := object.(type) {
119+
case *v1.NodeList:
120+
typedList.Items = append(typedList.Items, v1.Node{})
121+
return nil
122+
case *appsv1.ReplicaSetList:
123+
typedList.Items = append(typedList.Items, *ngfReplicaSet)
124+
return nil
125+
}
126+
return nil
127+
},
128+
getCalls: func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error {
129+
switch object.(type) {
130+
case *v1.Namespace:
131+
return errors.New("failed to collect namespace")
132+
}
133+
return nil
84134
},
135+
expData: usage.ClusterDetails{},
136+
expErr: true,
85137
},
86138
}
87139

88-
timeout := 10 * time.Second
89-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
90-
defer cancel()
140+
for _, test := range tests {
141+
t.Run(test.name, func(t *testing.T) {
142+
g := NewWithT(t)
143+
144+
k8sClientReader := &eventsfakes.FakeReader{}
145+
k8sClientReader.ListCalls(test.listCalls)
146+
k8sClientReader.GetCalls(test.getCalls)
91147

92-
worker(ctx)
93-
_, data := reporter.ReportArgsForCall(0)
94-
g.Expect(data).To(Equal(expData))
148+
reporter := &usagefakes.FakeReporter{}
149+
150+
worker := usage.CreateUsageJobWorker(
151+
zap.New(),
152+
k8sClientReader,
153+
reporter,
154+
config.Config{
155+
GatewayPodConfig: config.GatewayPodConfig{
156+
Namespace: "nginx-gateway",
157+
Name: "ngf-pod",
158+
},
159+
UsageReportConfig: &config.UsageReportConfig{
160+
ClusterDisplayName: "my-cluster",
161+
},
162+
},
163+
)
164+
165+
timeout := 10 * time.Second
166+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
167+
defer cancel()
168+
169+
worker(ctx)
170+
if test.expErr {
171+
g.Expect(reporter.ReportCallCount()).To(Equal(0))
172+
} else {
173+
_, data := reporter.ReportArgsForCall(0)
174+
g.Expect(data).To(Equal(test.expData))
175+
}
176+
})
177+
}
95178
}
96179

97180
func TestGetTotalNGFPodCount(t *testing.T) {

0 commit comments

Comments
 (0)