Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce tests to check whether workqueue metrics exist #6003

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/installation-cli.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
hack/cli-testing-environment.sh

# run a single e2e
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config
export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
- name: export logs
Expand Down Expand Up @@ -87,7 +87,7 @@ jobs:
hack/cli-testing-init-with-config.sh

# run a single e2e
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config
export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
- name: export logs for config test
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ require (
github.com/onsi/gomega v1.34.1
github.com/opensearch-project/opensearch-go v1.1.0
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.9.0
Expand Down Expand Up @@ -134,7 +135,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rivo/uniseg v0.4.2 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
Expand Down
10 changes: 9 additions & 1 deletion test/e2e/framework/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,15 @@ func WaitClusterFitWith(c client.Client, clusterName string, fit func(cluster *c

// LoadRESTClientConfig creates a rest.Config using the passed kubeconfig. If context is empty, current context in kubeconfig will be used.
func LoadRESTClientConfig(kubeconfig string, context string) (*rest.Config, error) {
loader := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig}
var loader *clientcmd.ClientConfigLoadingRules
if strings.Contains(kubeconfig, ":") {
// kubeconfig is a list of kubeconfig files in form of "file1:file2:file3"
loader = &clientcmd.ClientConfigLoadingRules{Precedence: strings.Split(kubeconfig, ":")}
} else {
// kubeconfig is a single file
loader = &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig}
}
Comment on lines +313 to +320
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks weird to parse kubeconfig like this, I think there are some mutual functions to handle cases like this.


loadedConfig, err := loader.Load()
if err != nil {
return nil, err
Expand Down
203 changes: 203 additions & 0 deletions test/e2e/framework/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
/*
Copyright 2024 The Karmada Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package framework

import (
"context"
"fmt"
"regexp"
"time"

"github.com/prometheus/common/model"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2"

"github.com/karmada-io/karmada/pkg/util/names"
)

const (
karmadaNamespace = "karmada-system"
metricsBindPort = 8080
leaderPodMetric = "leader_election_master_status"
queryTimeout = 10 * time.Second
)

// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go

// Grabber is used to grab metrics from karmada components
type Grabber struct {
hostKubeClient clientset.Interface
controllerManagerPods []string
schedulerPods []string
deschedulerPods []string
metricsAdapterPods []string
schedulerEstimatorPods []string
webhookPods []string
}

// NewMetricsGrabber creates a new metrics grabber
func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) {
grabber := Grabber{hostKubeClient: c}
regKarmadaControllerManager := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
regKarmadaControllerManager := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*")
regKarmadaControllerManagerPods := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*")

regKarmadaScheduler := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*")
regKarmadaDescheduler := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*")
regKarmadaMetricsAdapter := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*")
regKarmadaSchedulerEstimator := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*")
regKarmadaWebhook := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*")

podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question.
Is there any restriction that karmada must be installed in karmada-system?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, just because our e2e environment is installed in karmada-system

if err != nil {
return nil, err
}
if len(podList.Items) < 1 {
klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace)
}
for _, pod := range podList.Items {
if regKarmadaControllerManager.MatchString(pod.Name) {
grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name)
continue
}
if regKarmadaDescheduler.MatchString(pod.Name) {
grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name)
continue
}
if regKarmadaMetricsAdapter.MatchString(pod.Name) {
grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name)
continue
}
if regKarmadaSchedulerEstimator.MatchString(pod.Name) {
grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name)
continue
}
if regKarmadaScheduler.MatchString(pod.Name) {
grabber.schedulerPods = append(grabber.schedulerPods, pod.Name)
continue
}
if regKarmadaWebhook.MatchString(pod.Name) {
grabber.webhookPods = append(grabber.webhookPods, pod.Name)
}
}
return &grabber, nil
}

// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component
func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) {
pods, fromLeader := make([]string, 0), false
switch component {
case names.KarmadaControllerManagerComponentName:
pods, fromLeader = g.controllerManagerPods, true
case names.KarmadaSchedulerComponentName:
pods, fromLeader = g.schedulerPods, true
case names.KarmadaDeschedulerComponentName:
pods, fromLeader = g.deschedulerPods, true
case names.KarmadaMetricsAdapterComponentName:
pods = g.metricsAdapterPods
case names.KarmadaSchedulerEstimatorComponentName:
pods = g.schedulerEstimatorPods
case names.KarmadaWebhookComponentName:
pods = g.webhookPods
}
return g.grabMetricsFromPod(ctx, component, pods, fromLeader)
}

// grabMetricsFromPod fetch metrics from the leader pod
func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) {
var output string
var lastMetricsFetchErr error

result := make(map[string]testutil.Metrics)
for _, podName := range pods {
if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) {
output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort)
return lastMetricsFetchErr == nil, nil
}); metricsWaitErr != nil {
klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr)
continue
}

podMetrics := testutil.Metrics{}
metricsParseErr := testutil.ParseMetrics(output, &podMetrics)
if metricsParseErr != nil {
klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr)
continue
}

// judge which pod is the leader pod
if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) {
klog.Infof("skip fetch %s since it is not the leader pod", podName)
continue
}

result[podName] = podMetrics
klog.Infof("successfully grabbed metrics of %s", podName)
}

if len(result) == 0 {
return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component)
}
return result, nil
}

// GetMetricsFromPod retrieves metrics data.
func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) {
rawOutput, err := client.CoreV1().RESTClient().Get().
Namespace(namespace).
Resource("pods").
SubResource("proxy").
Name(fmt.Sprintf("%s:%d", podName, port)).
Suffix("metrics").
Do(ctx).Raw()
if err != nil {
return "", err
}
return string(rawOutput), nil
}

func isLeaderPod(samples model.Samples) bool {
for _, sample := range samples {
if sample.Value > 0 {
return true
}
}
return false
}

// GetMetricByName returns the metric value with the given name.
func GetMetricByName(samples model.Samples, name string) *model.Sample {
for _, sample := range samples {
if sample.Metric["name"] == model.LabelValue(name) {
return sample
}
}
return nil
}
Comment on lines +182 to +190
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused function?


// PrintMetricSample prints the metric sample
func PrintMetricSample(podName string, sample model.Samples) {
if sample.Len() == 0 {
return
}
if podName != "" {
klog.Infof("metrics from pod: %s", podName)
}
for _, s := range sample {
klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp)
}
}
126 changes: 126 additions & 0 deletions test/e2e/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
Copyright 2023 The Karmada Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2e

import (
"context"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/klog/v2"

policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
"github.com/karmada-io/karmada/pkg/util/names"
"github.com/karmada-io/karmada/test/e2e/framework"
testhelper "github.com/karmada-io/karmada/test/helper"
)

var _ = ginkgo.Describe("metrics testing", func() {
var grabber *framework.Grabber

var componentMetrics = map[string][]string{
names.KarmadaControllerManagerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
"cluster_ready_state", // custom ClusterCollectors metrics
"work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics
},
names.KarmadaSchedulerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
"karmada_scheduler_schedule_attempts_total", // scheduler custom metrics
},
names.KarmadaDeschedulerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
},
names.KarmadaMetricsAdapterComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
},
names.KarmadaSchedulerEstimatorComponentName: {
"karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics
},
names.KarmadaWebhookComponentName: {
"controller_runtime_webhook_requests_total", // controller runtime hook server metrics
},
}

ginkgo.BeforeEach(func() {
var err error
grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
})

ginkgo.Context("metrics presence testing", func() {
ginkgo.It("metrics presence testing for each component", func() {
ginkgo.By("do a simple scheduling to ensure above metrics exist", func() {
name := deploymentNamePrefix + rand.String(RandomStrLength)
deployment := testhelper.NewDeployment(testNamespace, name)
policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{
{
APIVersion: deployment.APIVersion,
Kind: deployment.Kind,
Name: deployment.Name,
},
}, policyv1alpha1.Placement{
ClusterAffinity: &policyv1alpha1.ClusterAffinity{
ClusterNames: framework.ClusterNames(),
},
})
framework.CreateDeployment(kubeClient, deployment)
framework.CreatePropagationPolicy(karmadaClient, policy)
ginkgo.DeferCleanup(func() {
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name)
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name)
})
framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true })
})

for component, metricNameList := range componentMetrics {
ginkgo.By("judge metrics presence of component: "+component, func() {
podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())

for _, metricName := range metricNameList {
metricExist := false
for podName, metrics := range podsMetrics {
// the output format of `metrics` is like:
// {
// "workqueue_queue_duration_seconds_sum": [{
// "metric": {
// "__name__": "workqueue_queue_duration_seconds_sum",
// "controller": "work-status-controller",
// "name": "work-status-controller"
// },
// "value": [0, "0.12403110800000001"]
// }]
// }
framework.PrintMetricSample(podName, metrics[metricName])
if metrics[metricName].Len() > 0 {
metricExist = true
break
}
}
if !metricExist {
klog.Errorf("metric %s not found in component %s", metricName, component)
gomega.Expect(metricExist).ShouldNot(gomega.BeFalse())
}
}
})
}
})
})
})
Loading