Monitor your infrastructure with InfluxDB and Grafana on Kubernetes

Grafana in action — Learn how to set it up in your AWS cloud

Create a new EKS Kubernetes cluster

eksctl create cluster --name "StarKube" --version 1.18 --region=eu-central-1 --without-nodegroup
eksctl create nodegroup --cluster=StarKube --name=StarKube-default-ng --nodes-min 1 --nodes-max 4 --node-volume-size=20 --ssh-access --node-zones eu-central-1b  --asg-access --tags "Maintainer=tfoldi" --node-labels "ngrole=default" --managed
$ kubectl get nodes
AGE VERSION
ip-192-168-36-245.eu-central-1.compute.internal Ready <none> 16s v1.18.9-eks-d1db3c

Create a namespace for monitoring apps

kubectl create namespace monitoring
kubectl config set-context --current --namespace=monitoring

Install InfluxDB on Kubernetes

kubectl create secret generic influxdb-creds \
--from-literal=INFLUXDB_DB=monitoring \
--from-literal=INFLUXDB_USER=user \
--from-literal=INFLUXDB_USER_PASSWORD=<password> \
--from-literal=INFLUXDB_READ_USER=readonly \
--from-literal=INFLUXDB_USER_PASSWORD=<password> \
--from-literal=INFLUXDB_ADMIN_USER=root \
--from-literal=INFLUXDB_ADMIN_USER_PASSWORD=<password> \
--from-literal=INFLUXDB_HOST=influxdb \
--from-literal=INFLUXDB_HTTP_AUTH_ENABLED=true
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
namespace: monitoring
labels:
app: influxdb
name: influxdb-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: monitoring
labels:
app: influxdb
name: influxdb
spec:
replicas: 1
selector:
matchLabels:
app: influxdb
template:
metadata:
labels:
app: influxdb
spec:
containers:
- envFrom:
- secretRef:
name: influxdb-creds
image: docker.io/influxdb:1.8
name: influxdb
volumeMounts:
- mountPath: /var/lib/influxdb
name: var-lib-influxdb
volumes:
- name: var-lib-influxdb
persistentVolumeClaim:
claimName: influxdb-pvc
[tfoldi@kompi]% kubectl get pods -l app=influxdb
NAME READY STATUS RESTARTS AGE
influxdb-7f694df996-rtdcz 1/1 Running 0 16m
apiVersion: v1
kind: Service
metadata:
labels:
app: influxdb
name: influxdb
namespace: monitoring
spec:
ports:
- port: 8086
protocol: TCP
targetPort: 8086
selector:
app: influxdb
type: LoadBalancer
$ kubectl get service/influxdb
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
influxdb LoadBalancer 10.100.15.18 ade3d20c142394935a9dd33c336b3a0f-2034222208.eu-central-1.elb.amazonaws.com 8086:30651/TCP 18h
$ curl http://ade3d20c142394935a9dd33c336b3a0f-2034222208.eu-central-1.elb.amazonaws.com:8086/ping
Amazon Issued SSL Certs are great but require Route 53 hosted zones. Alternatively, you can import existing SSL certificates.
apiVersion: v1
kind: Service
metadata:
annotations:
# Note that the backend talks over HTTP.
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
# TODO: Fill in with the ARN of your certificate.
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:{region}:{user id}:certificate/{id}
# Only run SSL on the port named "https" below.
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https"
labels:
app: influxdb
name: influxdb
namespace: monitoring
spec:
ports:
- port: 8086
targetPort: 8086
name: http
- port: 443
name: https
targetPort: 8086
selector:
app: influxdb
type: LoadBalancer
[tfoldi@kompi]% kubectl get services/influxdb
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
influxdb LoadBalancer 10.100.15.18 ade3d20c142394935a9dd33c336b3a0f-2034222208.eu-central-1.elb.amazonaws.com 8086:30651/TCP,443:31445/TCP 18h

Installing Telegraf on Kubernetes

apiVersion: v1
kind: ConfigMap
metadata:
name: telegraf
namespace: monitoring
labels:
k8s-app: telegraf
data:
telegraf.conf: |+
[global_tags]
env = "EKS eu-central"
[agent]
hostname = "$HOSTNAME"
[[outputs.influxdb]]
urls = ["http://$INFLUXDB_HOST:8086/"] # required
database = "$INFLUXDB_DB" # required
timeout = "5s"
username = "$INFLUXDB_USER"
password = "$INFLUXDB_USER_PASSWORD"
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs"]
[[inputs.diskio]]
[[inputs.kernel]]
[[inputs.mem]]
[[inputs.processes]]
[[inputs.swap]]
[[inputs.system]]
[[inputs.docker]]
endpoint = "unix:///var/run/docker.sock"
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: telegraf
namespace: monitoring
labels:
k8s-app: telegraf
spec:
selector:
matchLabels:
name: telegraf
template:
metadata:
labels:
name: telegraf
spec:
containers:
- name: telegraf
image: docker.io/telegraf:1.5.2
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: "HOST_PROC"
value: "/rootfs/proc"
- name: "HOST_SYS"
value: "/rootfs/sys"
- name: INFLUXDB_USER
valueFrom:
secretKeyRef:
name: influxdb-creds
key: INFLUXDB_USER
- name: INFLUXDB_USER_PASSWORD
valueFrom:
secretKeyRef:
name: influxdb-creds
key: INFLUXDB_USER_PASSWORD
- name: INFLUXDB_HOST
valueFrom:
secretKeyRef:
name: influxdb-creds
key: INFLUXDB_HOST
- name: INFLUXDB_DB
valueFrom:
secretKeyRef:
name: influxdb-creds
key: INFLUXDB_DB
volumeMounts:
- name: sys
mountPath: /rootfs/sys
readOnly: true
- name: proc
mountPath: /rootfs/proc
readOnly: true
- name: docker-socket
mountPath: /var/run/docker.sock
- name: utmp
mountPath: /var/run/utmp
readOnly: true
- name: config
mountPath: /etc/telegraf
terminationGracePeriodSeconds: 30
volumes:
- name: sys
hostPath:
path: /sys
- name: docker-socket
hostPath:
path: /var/run/docker.sock
- name: proc
hostPath:
path: /proc
- name: utmp
hostPath:
path: /var/run/utmp
- name: config
configMap:
name: telegraf
$ kubectl get pods -l name=telegraf
NAME READY STATUS RESTARTS AGE
telegraf-mrgrg 1/1 Running 0 18h

Set up Grafana in Kubernetes

kubectl create secret generic grafana-creds \                                                                                                                                            
--from-literal=GF_SECURITY_ADMIN_USER=admin \
--from-literal=GF_SECURITY_ADMIN_PASSWORD=admin123
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: graf-data-dir-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: monitoring
labels:
app: grafana
name: grafana
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- envFrom:
- secretRef:
name: grafana-creds
image: docker.io/grafana/grafana:7.3.3
name: grafana
volumeMounts:
- name: data-dir
mountPath: /var/lib/grafana/
securityContext:
fsGroup: 472
volumes:
- name: data-dir
persistentVolumeClaim:
claimName: graf-data-dir-pvc
apiVersion: v1
kind: Service
metadata:
annotations:
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:eu-central-1:<account>:certificate/<certid> service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https"
labels:
app: grafana
name: grafana
namespace: monitoring
spec:
ports:
- port: 443
name: https
targetPort: 3000
selector:
app: grafana
type: LoadBalancer
I am glad that you made it here, now let’s log on!
Home screen for our empty Grafana

Define database connection to InfluxDB

You know where should you click

Adding our first Grafana Dashboard

This is really cool

Next steps

Tamas is co-founder and CTO of data services firm Starschema where he leads the Starschema technical team to deliver results for the most innovative enterprises