-
Notifications
You must be signed in to change notification settings - Fork 914
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
introduce tests to check whether workqueue metrics exist
Signed-off-by: chaosi-zju <[email protected]>
- Loading branch information
1 parent
43f2953
commit b17e3dd
Showing
6 changed files
with
352 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
/* | ||
Copyright 2024 The Karmada Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package framework | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"regexp" | ||
"time" | ||
|
||
"github.com/prometheus/common/model" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
clientset "k8s.io/client-go/kubernetes" | ||
"k8s.io/component-base/metrics/testutil" | ||
"k8s.io/klog/v2" | ||
|
||
"github.com/karmada-io/karmada/pkg/util/names" | ||
) | ||
|
||
const ( | ||
karmadaNamespace = "karmada-system" | ||
metricsBindPort = 8080 | ||
leaderPodMetric = "leader_election_master_status" | ||
queryTimeout = 10 * time.Second | ||
) | ||
|
||
// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go | ||
|
||
// Grabber is used to grab metrics from karmada components | ||
type Grabber struct { | ||
hostKubeClient clientset.Interface | ||
controllerManagerPods []string | ||
schedulerPods []string | ||
deschedulerPods []string | ||
metricsAdapterPods []string | ||
schedulerEstimatorPods []string | ||
webhookPods []string | ||
} | ||
|
||
// NewMetricsGrabber creates a new metrics grabber | ||
func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) { | ||
grabber := Grabber{hostKubeClient: c} | ||
regKarmadaControllerManager := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*") | ||
regKarmadaScheduler := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*") | ||
regKarmadaDescheduler := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*") | ||
regKarmadaMetricsAdapter := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*") | ||
regKarmadaSchedulerEstimator := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*") | ||
regKarmadaWebhook := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*") | ||
|
||
podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if len(podList.Items) < 1 { | ||
klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace) | ||
} | ||
for _, pod := range podList.Items { | ||
if regKarmadaControllerManager.MatchString(pod.Name) { | ||
grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name) | ||
continue | ||
} | ||
if regKarmadaDescheduler.MatchString(pod.Name) { | ||
grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name) | ||
continue | ||
} | ||
if regKarmadaMetricsAdapter.MatchString(pod.Name) { | ||
grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name) | ||
continue | ||
} | ||
if regKarmadaSchedulerEstimator.MatchString(pod.Name) { | ||
grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name) | ||
continue | ||
} | ||
if regKarmadaScheduler.MatchString(pod.Name) { | ||
grabber.schedulerPods = append(grabber.schedulerPods, pod.Name) | ||
continue | ||
} | ||
if regKarmadaWebhook.MatchString(pod.Name) { | ||
grabber.webhookPods = append(grabber.webhookPods, pod.Name) | ||
} | ||
} | ||
return &grabber, nil | ||
} | ||
|
||
// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component | ||
func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) { | ||
pods, fromLeader := make([]string, 0), false | ||
switch component { | ||
case names.KarmadaControllerManagerComponentName: | ||
pods, fromLeader = g.controllerManagerPods, true | ||
case names.KarmadaSchedulerComponentName: | ||
pods, fromLeader = g.schedulerPods, true | ||
case names.KarmadaDeschedulerComponentName: | ||
pods, fromLeader = g.deschedulerPods, true | ||
case names.KarmadaMetricsAdapterComponentName: | ||
pods = g.metricsAdapterPods | ||
case names.KarmadaSchedulerEstimatorComponentName: | ||
pods = g.schedulerEstimatorPods | ||
case names.KarmadaWebhookComponentName: | ||
pods = g.webhookPods | ||
} | ||
return g.grabMetricsFromPod(ctx, component, pods, fromLeader) | ||
} | ||
|
||
// grabMetricsFromPod fetch metrics from the leader pod | ||
func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) { | ||
var output string | ||
var lastMetricsFetchErr error | ||
|
||
result := make(map[string]testutil.Metrics) | ||
for _, podName := range pods { | ||
if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) { | ||
output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort) | ||
return lastMetricsFetchErr == nil, nil | ||
}); metricsWaitErr != nil { | ||
klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr) | ||
continue | ||
} | ||
|
||
podMetrics := testutil.Metrics{} | ||
metricsParseErr := testutil.ParseMetrics(output, &podMetrics) | ||
if metricsParseErr != nil { | ||
klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr) | ||
continue | ||
} | ||
|
||
// judge which pod is the leader pod | ||
if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) { | ||
klog.Infof("skip fetch %s since it is not the leader pod", podName) | ||
continue | ||
} | ||
|
||
result[podName] = podMetrics | ||
klog.Infof("successfully grabbed metrics of %s", podName) | ||
} | ||
|
||
if len(result) == 0 { | ||
return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component) | ||
} | ||
return result, nil | ||
} | ||
|
||
// GetMetricsFromPod retrieves metrics data. | ||
func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) { | ||
rawOutput, err := client.CoreV1().RESTClient().Get(). | ||
Namespace(namespace). | ||
Resource("pods"). | ||
SubResource("proxy"). | ||
Name(fmt.Sprintf("%s:%d", podName, port)). | ||
Suffix("metrics"). | ||
Do(ctx).Raw() | ||
if err != nil { | ||
return "", err | ||
} | ||
return string(rawOutput), nil | ||
} | ||
|
||
func isLeaderPod(samples model.Samples) bool { | ||
for _, sample := range samples { | ||
if sample.Value > 0 { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
// GetMetricByName returns the metric value with the given name. | ||
func GetMetricByName(samples model.Samples, name string) *model.Sample { | ||
for _, sample := range samples { | ||
if sample.Metric["name"] == model.LabelValue(name) { | ||
return sample | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// PrintMetricSample prints the metric sample | ||
func PrintMetricSample(podName string, sample model.Samples) { | ||
if sample.Len() == 0 { | ||
return | ||
} | ||
if podName != "" { | ||
klog.Infof("metrics from pod: %s", podName) | ||
} | ||
for _, s := range sample { | ||
klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
Copyright 2023 The Karmada Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package e2e | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/onsi/ginkgo/v2" | ||
"github.com/onsi/gomega" | ||
appsv1 "k8s.io/api/apps/v1" | ||
"k8s.io/apimachinery/pkg/util/rand" | ||
"k8s.io/klog/v2" | ||
|
||
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" | ||
"github.com/karmada-io/karmada/pkg/util/names" | ||
"github.com/karmada-io/karmada/test/e2e/framework" | ||
testhelper "github.com/karmada-io/karmada/test/helper" | ||
) | ||
|
||
var _ = ginkgo.Describe("metrics testing", func() { | ||
var grabber *framework.Grabber | ||
|
||
var componentMetrics = map[string][]string{ | ||
names.KarmadaControllerManagerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
"cluster_ready_state", // custom ClusterCollectors metrics | ||
"work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics | ||
}, | ||
names.KarmadaSchedulerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
"karmada_scheduler_schedule_attempts_total", // scheduler custom metrics | ||
}, | ||
names.KarmadaDeschedulerComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
}, | ||
names.KarmadaMetricsAdapterComponentName: { | ||
"workqueue_queue_duration_seconds_sum", // workqueue metrics | ||
}, | ||
names.KarmadaSchedulerEstimatorComponentName: { | ||
"karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics | ||
}, | ||
names.KarmadaWebhookComponentName: { | ||
"controller_runtime_webhook_requests_total", // controller runtime hook server metrics | ||
}, | ||
} | ||
|
||
ginkgo.BeforeEach(func() { | ||
var err error | ||
grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
}) | ||
|
||
ginkgo.Context("metrics presence testing", func() { | ||
ginkgo.It("metrics presence testing for each component", func() { | ||
ginkgo.By("do a simple scheduling to ensure above metrics exist", func() { | ||
name := deploymentNamePrefix + rand.String(RandomStrLength) | ||
deployment := testhelper.NewDeployment(testNamespace, name) | ||
policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{ | ||
{ | ||
APIVersion: deployment.APIVersion, | ||
Kind: deployment.Kind, | ||
Name: deployment.Name, | ||
}, | ||
}, policyv1alpha1.Placement{ | ||
ClusterAffinity: &policyv1alpha1.ClusterAffinity{ | ||
ClusterNames: framework.ClusterNames(), | ||
}, | ||
}) | ||
framework.CreateDeployment(kubeClient, deployment) | ||
framework.CreatePropagationPolicy(karmadaClient, policy) | ||
ginkgo.DeferCleanup(func() { | ||
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name) | ||
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name) | ||
}) | ||
framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true }) | ||
}) | ||
|
||
for component, metricNameList := range componentMetrics { | ||
ginkgo.By("judge metrics presence of component: "+component, func() { | ||
podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component) | ||
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) | ||
|
||
for _, metricName := range metricNameList { | ||
metricExist := false | ||
for podName, metrics := range podsMetrics { | ||
// the output format of `metrics` is like: | ||
// { | ||
// "workqueue_queue_duration_seconds_sum": [{ | ||
// "metric": { | ||
// "__name__": "workqueue_queue_duration_seconds_sum", | ||
// "controller": "work-status-controller", | ||
// "name": "work-status-controller" | ||
// }, | ||
// "value": [0, "0.12403110800000001"] | ||
// }] | ||
// } | ||
framework.PrintMetricSample(podName, metrics[metricName]) | ||
if metrics[metricName].Len() > 0 { | ||
metricExist = true | ||
break | ||
} | ||
} | ||
if !metricExist { | ||
klog.Errorf("metric %s not found in component %s", metricName, component) | ||
gomega.Expect(metricExist).ShouldNot(gomega.BeFalse()) | ||
} | ||
} | ||
}) | ||
} | ||
}) | ||
}) | ||
}) |
Oops, something went wrong.