-
Notifications
You must be signed in to change notification settings - Fork 914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
introduce tests to check whether workqueue metrics exist #6003
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,203 @@ | ||||||
/* | ||||||
Copyright 2024 The Karmada Authors. | ||||||
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
you may not use this file except in compliance with the License. | ||||||
You may obtain a copy of the License at | ||||||
|
||||||
http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|
||||||
Unless required by applicable law or agreed to in writing, software | ||||||
distributed under the License is distributed on an "AS IS" BASIS, | ||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
See the License for the specific language governing permissions and | ||||||
limitations under the License. | ||||||
*/ | ||||||
|
||||||
package framework | ||||||
|
||||||
import ( | ||||||
"context" | ||||||
"fmt" | ||||||
"regexp" | ||||||
"time" | ||||||
|
||||||
"github.com/prometheus/common/model" | ||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
"k8s.io/apimachinery/pkg/util/wait" | ||||||
clientset "k8s.io/client-go/kubernetes" | ||||||
"k8s.io/component-base/metrics/testutil" | ||||||
"k8s.io/klog/v2" | ||||||
|
||||||
"github.com/karmada-io/karmada/pkg/util/names" | ||||||
) | ||||||
|
||||||
const ( | ||||||
karmadaNamespace = "karmada-system" | ||||||
metricsBindPort = 8080 | ||||||
leaderPodMetric = "leader_election_master_status" | ||||||
queryTimeout = 10 * time.Second | ||||||
) | ||||||
|
||||||
// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go | ||||||
|
||||||
// Grabber is used to grab metrics from karmada components | ||||||
type Grabber struct { | ||||||
hostKubeClient clientset.Interface | ||||||
controllerManagerPods []string | ||||||
schedulerPods []string | ||||||
deschedulerPods []string | ||||||
metricsAdapterPods []string | ||||||
schedulerEstimatorPods []string | ||||||
webhookPods []string | ||||||
} | ||||||
|
||||||
// NewMetricsGrabber creates a new metrics grabber | ||||||
func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) { | ||||||
grabber := Grabber{hostKubeClient: c} | ||||||
regKarmadaControllerManager := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
regKarmadaScheduler := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*") | ||||||
regKarmadaDescheduler := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*") | ||||||
regKarmadaMetricsAdapter := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*") | ||||||
regKarmadaSchedulerEstimator := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*") | ||||||
regKarmadaWebhook := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*") | ||||||
|
||||||
podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{}) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a question. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, just because our e2e environment is installed in |
||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
if len(podList.Items) < 1 { | ||||||
klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace) | ||||||
} | ||||||
for _, pod := range podList.Items { | ||||||
if regKarmadaControllerManager.MatchString(pod.Name) { | ||||||
grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name) | ||||||
continue | ||||||
} | ||||||
if regKarmadaDescheduler.MatchString(pod.Name) { | ||||||
grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name) | ||||||
continue | ||||||
} | ||||||
if regKarmadaMetricsAdapter.MatchString(pod.Name) { | ||||||
grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name) | ||||||
continue | ||||||
} | ||||||
if regKarmadaSchedulerEstimator.MatchString(pod.Name) { | ||||||
grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name) | ||||||
continue | ||||||
} | ||||||
if regKarmadaScheduler.MatchString(pod.Name) { | ||||||
grabber.schedulerPods = append(grabber.schedulerPods, pod.Name) | ||||||
continue | ||||||
} | ||||||
if regKarmadaWebhook.MatchString(pod.Name) { | ||||||
grabber.webhookPods = append(grabber.webhookPods, pod.Name) | ||||||
} | ||||||
} | ||||||
return &grabber, nil | ||||||
} | ||||||
|
||||||
// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component | ||||||
func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) { | ||||||
pods, fromLeader := make([]string, 0), false | ||||||
switch component { | ||||||
case names.KarmadaControllerManagerComponentName: | ||||||
pods, fromLeader = g.controllerManagerPods, true | ||||||
case names.KarmadaSchedulerComponentName: | ||||||
pods, fromLeader = g.schedulerPods, true | ||||||
case names.KarmadaDeschedulerComponentName: | ||||||
pods, fromLeader = g.deschedulerPods, true | ||||||
case names.KarmadaMetricsAdapterComponentName: | ||||||
pods = g.metricsAdapterPods | ||||||
case names.KarmadaSchedulerEstimatorComponentName: | ||||||
pods = g.schedulerEstimatorPods | ||||||
case names.KarmadaWebhookComponentName: | ||||||
pods = g.webhookPods | ||||||
} | ||||||
return g.grabMetricsFromPod(ctx, component, pods, fromLeader) | ||||||
} | ||||||
|
||||||
// grabMetricsFromPod fetch metrics from the leader pod | ||||||
func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) { | ||||||
var output string | ||||||
var lastMetricsFetchErr error | ||||||
|
||||||
result := make(map[string]testutil.Metrics) | ||||||
for _, podName := range pods { | ||||||
if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) { | ||||||
output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort) | ||||||
return lastMetricsFetchErr == nil, nil | ||||||
}); metricsWaitErr != nil { | ||||||
klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr) | ||||||
continue | ||||||
} | ||||||
|
||||||
podMetrics := testutil.Metrics{} | ||||||
metricsParseErr := testutil.ParseMetrics(output, &podMetrics) | ||||||
if metricsParseErr != nil { | ||||||
klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr) | ||||||
continue | ||||||
} | ||||||
|
||||||
// judge which pod is the leader pod | ||||||
if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) { | ||||||
klog.Infof("skip fetch %s since it is not the leader pod", podName) | ||||||
continue | ||||||
} | ||||||
|
||||||
result[podName] = podMetrics | ||||||
klog.Infof("successfully grabbed metrics of %s", podName) | ||||||
} | ||||||
|
||||||
if len(result) == 0 { | ||||||
return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component) | ||||||
} | ||||||
return result, nil | ||||||
} | ||||||
|
||||||
// GetMetricsFromPod retrieves metrics data. | ||||||
func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) { | ||||||
rawOutput, err := client.CoreV1().RESTClient().Get(). | ||||||
Namespace(namespace). | ||||||
Resource("pods"). | ||||||
SubResource("proxy"). | ||||||
Name(fmt.Sprintf("%s:%d", podName, port)). | ||||||
Suffix("metrics"). | ||||||
Do(ctx).Raw() | ||||||
if err != nil { | ||||||
return "", err | ||||||
} | ||||||
return string(rawOutput), nil | ||||||
} | ||||||
|
||||||
func isLeaderPod(samples model.Samples) bool { | ||||||
for _, sample := range samples { | ||||||
if sample.Value > 0 { | ||||||
return true | ||||||
} | ||||||
} | ||||||
return false | ||||||
} | ||||||
|
||||||
// GetMetricByName returns the metric value with the given name. | ||||||
func GetMetricByName(samples model.Samples, name string) *model.Sample { | ||||||
for _, sample := range samples { | ||||||
if sample.Metric["name"] == model.LabelValue(name) { | ||||||
return sample | ||||||
} | ||||||
} | ||||||
return nil | ||||||
} | ||||||
Comment on lines
+182
to
+190
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unused function? |
||||||
|
||||||
// PrintMetricSample prints the metric sample | ||||||
func PrintMetricSample(podName string, sample model.Samples) { | ||||||
if sample.Len() == 0 { | ||||||
return | ||||||
} | ||||||
if podName != "" { | ||||||
klog.Infof("metrics from pod: %s", podName) | ||||||
} | ||||||
for _, s := range sample { | ||||||
klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp) | ||||||
} | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
Copyright 2023 The Karmada Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package e2e | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/onsi/ginkgo/v2" | ||
"github.com/onsi/gomega" | ||
appsv1 "k8s.io/api/apps/v1" | ||
"k8s.io/apimachinery/pkg/util/rand" | ||
"k8s.io/klog/v2" | ||
|
||
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" | ||
"github.com/karmada-io/karmada/pkg/util/names" | ||
"github.com/karmada-io/karmada/test/e2e/framework" | ||
testhelper "github.com/karmada-io/karmada/test/helper" | ||
) | ||
|
||
var _ = ginkgo.Describe("metrics testing", func() { | ||
var grabber *framework.Grabber | ||
|
||
var componentMetrics = map[string][]string{ | ||
names.KarmadaControllerManagerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
"cluster_ready_state", // custom ClusterCollectors metrics | ||
"work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics | ||
}, | ||
names.KarmadaSchedulerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
"karmada_scheduler_schedule_attempts_total", // scheduler custom metrics | ||
}, | ||
names.KarmadaDeschedulerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
}, | ||
names.KarmadaMetricsAdapterComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
}, | ||
names.KarmadaSchedulerEstimatorComponentName: { | ||
"karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics | ||
}, | ||
names.KarmadaWebhookComponentName: { | ||
"controller_runtime_webhook_requests_total", // controller runtime hook server metrics | ||
}, | ||
} | ||
|
||
ginkgo.BeforeEach(func() { | ||
var err error | ||
grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
}) | ||
|
||
ginkgo.Context("metrics presence testing", func() { | ||
ginkgo.It("metrics presence testing for each component", func() { | ||
ginkgo.By("do a simple scheduling to ensure above metrics exist", func() { | ||
name := deploymentNamePrefix + rand.String(RandomStrLength) | ||
deployment := testhelper.NewDeployment(testNamespace, name) | ||
policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{ | ||
{ | ||
APIVersion: deployment.APIVersion, | ||
Kind: deployment.Kind, | ||
Name: deployment.Name, | ||
}, | ||
}, policyv1alpha1.Placement{ | ||
ClusterAffinity: &policyv1alpha1.ClusterAffinity{ | ||
ClusterNames: framework.ClusterNames(), | ||
}, | ||
}) | ||
framework.CreateDeployment(kubeClient, deployment) | ||
framework.CreatePropagationPolicy(karmadaClient, policy) | ||
ginkgo.DeferCleanup(func() { | ||
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name) | ||
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name) | ||
}) | ||
framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true }) | ||
}) | ||
|
||
for component, metricNameList := range componentMetrics { | ||
ginkgo.By("judge metrics presence of component: "+component, func() { | ||
podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
|
||
for _, metricName := range metricNameList { | ||
metricExist := false | ||
for podName, metrics := range podsMetrics { | ||
// the output format of `metrics` is like: | ||
// { | ||
// "workqueue_queue_duration_seconds_sum": [{ | ||
// "metric": { | ||
// "__name__": "workqueue_queue_duration_seconds_sum", | ||
// "controller": "work-status-controller", | ||
// "name": "work-status-controller" | ||
// }, | ||
// "value": [0, "0.12403110800000001"] | ||
// }] | ||
// } | ||
framework.PrintMetricSample(podName, metrics[metricName]) | ||
if metrics[metricName].Len() > 0 { | ||
metricExist = true | ||
break | ||
} | ||
} | ||
if !metricExist { | ||
klog.Errorf("metric %s not found in component %s", metricName, component) | ||
gomega.Expect(metricExist).ShouldNot(gomega.BeFalse()) | ||
} | ||
} | ||
}) | ||
} | ||
}) | ||
}) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks weird to parse kubeconfig like this, I think there are some mutual functions to handle cases like this.