Kubernetes HPA Autoscaling with Custom and External Metrics

Using GKE and Stackdriver Metrics

Background

Example of Scaling Based on External Metrics

# check if the metric server is deployed (or heapster if before v1.11)$ kubectl get deploy --all-namespaces
[...deleted...]
kube-system metrics-server-v0.2.1 1 1
kube-system heapster-v1.5.3 1 1
# make a request to the metrics api to show that its available$ kubectl get --raw "/apis/metrics.k8s.io/" | jq
{
"kind": "APIGroup",
"apiVersion": "v1",
"name": "metrics.k8s.io",
"versions": [
{
"groupVersion": "metrics.k8s.io/v1beta1",
"version": "v1beta1"
}
],
"preferredVersion": {
"groupVersion": "metrics.k8s.io/v1beta1",
"version": "v1beta1"
},
"serverAddressByClientCIDRs": null
}
$ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq
Error from server (NotFound): the server could not find the requested resource
$ kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1"
Error from server (NotFound): the server could not find the requested resource
$ kubectl create clusterrolebinding cluster-admin-binding \
--clusterrole cluster-admin \
--user "$(gcloud config get-value account)"

clusterrolebinding.rbac.authorization.k8s.io/cluster-admin-binding created
$ kubectl create -f https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/production/adapter.yaml
# confirm it deployed happily
$ kubectl get po --all-namespaces
custom-metrics custom-metrics-stackdriver-adapter-c4d98dc54-xq8bj 1/1 Running 0 51s
# check to see if custom/external metrics api is up now
$ kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1" | jq
{
"kind": "APIResourceList",
"apiVersion": "v1",
"groupVersion": "external.metrics.k8s.io/v1beta1",
"resources": []
}
$ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq
{
"kind": "APIResourceList",
"apiVersion": "v1",
"groupVersion": "custom.metrics.k8s.io/v1beta1",
"resources": [
{
"name": "*/agent.googleapis.com|agent|api_request_count",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
},
[...lots more metrics...]
{
"name": "*/vpn.googleapis.com|tunnel_established",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
}
]
}
- name: prometheus-to-sd
image: gcr.io/google-containers/prometheus-to-sd:v0.2.1
ports:
- name: profiler
containerPort: 6060
command:
- /monitor
- --stackdriver-prefix=custom.googleapis.com
- --source=nginx-ingress-controller:http://localhost:10254/metrics
- --pod-id=$(POD_NAME)
- --namespace-id=$(POD_NAMESPACE)
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
$ kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1/namespaces/default/custom.googleapis.com|nginx-ingress-controller|nginx_connections_total" | jq{
"kind": "ExternalMetricValueList",
"apiVersion": "external.metrics.k8s.io/v1beta1",
"metadata": {
"selfLink": "/apis/external.metrics.k8s.io/v1beta1/namespaces/default/custom.googleapis.com%7Cnginx-ingress-controller%7Cnginx_connections_total"
},
"items": [
[...removed...]
{
"metricName": "custom.googleapis.com|nginx-ingress-controller|nginx_connections_total",
"metricLabels": {
"metric.labels.ingress_class": "nginx",
"metric.labels.namespace": "",
"metric.labels.state": "active",
"resource.labels.cluster_name": "example-custom-metrics",
"resource.labels.container_name": "",
"resource.labels.instance_id": "gke-example-custom-metri-default-pool-43d79fe3-08rp.c.cluster-health-test.internal",
"resource.labels.namespace_id": "default",
"resource.labels.pod_id": "nginx-nginx-ingress-controller-df8dd967f-fvcx9",
"resource.labels.project_id": "cluster-health-test",
"resource.labels.zone": "us-central1-a",
"resource.type": "gke_container"
},
"timestamp": "2018-07-22T21:22:48Z",
"value": "0"
},
[...removed...]
]
}
# hpa.yamlapiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: example-hpa-external-metrics
spec:
minReplicas: 1
maxReplicas: 5
metrics:
- type: External
external:
metricName: custom.googleapis.com|nginx-ingress-internal-controller|nginx_connections_total
targetValue: 1
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: example-nodejs-app
$ kubectl describe hpa example-hpa-external-metricsName: example-hpa-external-metrics
Namespace: default
Reference: Deployment/example-nodejs-app
Metrics:
"custom.googleapis.com|nginx-ingress_controller|nginx_connections_total"
(target value): 1/ 1
Min replicas: 1
Max replicas: 5
Deployment pods: 1 current / 1 desired
Conditions:
Type Status Reason Message
---- ------ ------ -------
AbleToScale True ReadyForNewScale the last scale time was sufficiently old as to warrant a new scale
ScalingActive True ValidMetricFound the HPA was able to successfully calculate a replica count from external metric custom.googleapis.com|nginx-ingress-controller|nginx_connections_total(nil)

ScalingLimited False DesiredWithinRange the desired count is within the acceptable range
Events: <none>

Live simply. Program stuff.