I followed your advice and moved to only using config flags, in fact I set it up in all the different ways I can think of, and the result seems to always be the same.
example: using CLI flags
docker-compose.yml
(using CLI flags)
---
version: "3"
volumes:
# persistent volumes for storage backends which will remain across container lifetimes
jaeger-data: {} # storage of traces on-disk from jaeger
loki-data: {} # storage of log data on-disk from loki
prometheus-data: {} # storage of metrics data on-disk from prometheus
services:
# utility for exposing docker containers over local DNS
traefik:
image: traefik:v2.10
hostname: traefik
command:
- "--api.insecure"
- "--entrypoints.tcp80.address=:80/tcp"
- "--entrypoints.otelgrpc.address=:4317/tcp"
- "--entrypoints.otelhttp.address=:4318/tcp"
- "--providers.docker"
- "--providers.docker.exposedByDefault=false"
ports:
- "80:80"
- "8080:8080"
- "4317:4317"
- "4318:4318"
volumes:
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
restart: unless-stopped
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:8080/" ]
interval: 1s
timeout: 2s
retries: 15
jaeger-volume:
image: alpine
command:
- chmod
- "0777"
- /jaeger
volumes:
- type: volume
source: jaeger-data
target: /jaeger
jaeger: # storage backend for trace data
image: jaegertracing/all-in-one:1
hostname: jaeger
environment:
# enable opentelemetry support
COLLECTOR_OTLP_ENABLED: true
# store traces on disk as opposed to memory, allowing persistence between container restarts
SPAN_STORAGE_TYPE: badger
BADGER_EPHEMERAL: false
BADGER_DIRECTORY_KEY: /jaeger/keys
BADGER_DIRECTORY_VALUE: /jaeger/values
volumes:
- type: volume
source: jaeger-data
target: /jaeger
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "16686" # HTTP frontend
labels:
- "traefik.enable=true"
- "traefik.http.routers.jaeger-router.rule=Host(`jaeger.localhost`)"
- "traefix.http.routers.jaeger-router.entrypoints=tcp80"
- "traefik.http.routers.jaeger-router.service=jaeger-service"
- "traefik.http.services.jaeger-service.loadbalancer.server.port=3000"
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:14269/"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger-volume:
condition: service_completed_successfully
loki: # storage backend for log data
image: grafana/loki:2.9.2
hostname: loki
volumes:
- type: bind
source: ./local/etc/loki
target: /etc/loki
read_only: true
- type: volume
source: loki-data
target: /loki
ports:
- "3100"
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3100/ready" ]
interval: 2s
timeout: 2s
retries: 15
prometheus: # storage backend for metric data
image: prom/prometheus:v2.47.2
hostname: prometheus
command:
# enable pushing data into prometheus
- "--web.enable-remote-write-receiver"
# since we override the command, we need to re-specify the config file location
- "--config.file=/etc/prometheus/prometheus.yml"
volumes:
- type: bind
source: ./local/etc/prometheus
target: /etc/prometheus
read_only: true
- type: volume
source: prometheus-data
target: /prometheus
ports:
- "9090"
labels:
- traefik.enable=true
- traefik.http.routers.prom-router.rule=Host(`prometheus.localhost`)
- traefix.http.routers.prom-router.entrypoints=tcp80
- traefik.http.routers.prom-router.service=prom-service
- traefik.http.services.prom-service.loadbalancer.server.port=9090
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:9090/-/healthy"]
interval: 2s
timeout: 2s
retries: 15
grafana:
# log display
image: grafana/grafana:10.2.0
hostname: grafana
environment:
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_DISABLE_LOGIN_FORM: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
volumes:
- type: bind
source: ./local/etc/grafana/provisioning/datasources
target: /etc/grafana/provisioning/datasources
read_only: true
ports:
- "3000"
labels:
- traefik.enable=true
- traefik.http.routers.grafana-router.rule=Host(`telemetry.localhost`)
- traefix.http.routers.grafana-router.entrypoints=tcp80
- traefik.http.routers.grafana-router.service=grafana-service
- traefik.http.services.grafana-service.loadbalancer.server.port=3000
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3000/api/health"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
collector:
image: grafana/agent:latest
command:
- "run"
- "--server.http.listen-addr=0.0.0.0:12345"
- "/etc/grafana/agent/config.river"
environment:
AGENT_MODE: flow
CONFIG_FILE_PATH: /etc/grafana/agent/config.river
volumes:
- type: bind
source: ./local/etc/grafana/agent
target: /etc/grafana/agent
read_only: true
- type: bind
source: ./local/lib/grafana/agent/healthcheck.sh
target: /lib/grafana-healthcheck.sh
read_only: true
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "12345" # web console
labels:
- traefik.enable=true
# :4317 gRPC (TCP forwarding)
- traefik.tcp.routers.otlp-grpc-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-grpc-router.priority=1000
- traefik.tcp.routers.otlp-grpc-router.entrypoints=otelgrpc
- traefik.tcp.routers.otlp-grpc-router.service=otlp-grpc-service
- traefik.tcp.services.otlp-grpc-service.loadbalancer.server.port=4317
# :4318 HTTP (TCP forwarding)
- traefik.tcp.routers.otlp-http-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-http-router.priority=1000
- traefik.tcp.routers.otlp-http-router.entrypoints=otelhttp
- traefik.tcp.routers.otlp-http-router.service=otlp-http-service
- traefik.tcp.services.otlp-http-service.loadbalancer.server.port=4318
# Dashboard (HTTP forwarding)
- traefik.http.routers.collector-router.rule=Host(`collector.localhost`)
- traefix.http.routers.collector-router.entrypoints=tcp80
- traefik.http.routers.collector-router.service=collector-service
- traefik.http.services.collector-service.loadbalancer.server.port=12345
healthcheck:
test: [ "CMD", "/lib/grafana-healthcheck.sh" ]
interval: 2s
timeout: 30s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
Just as before, it boots as expected, all services including traefik
show healthy, but I still see the TCP listeners attached to services they have nothing to do with:
example: using environment variables
docker-compose.yml
(using env vars)
---
version: "3"
volumes:
# persistent volumes for storage backends which will remain across container lifetimes
jaeger-data: {} # storage of traces on-disk from jaeger
loki-data: {} # storage of log data on-disk from loki
prometheus-data: {} # storage of metrics data on-disk from prometheus
services:
# utility for exposing docker containers over local DNS
traefik:
image: traefik:v2.10
hostname: traefik
environment:
TRAEFIK_API_INSECURE: true
TRAEFIK_ENTRYPOINTS_tcp80: true
TRAEFIK_ENTRYPOINTS_tcp80_ADDRESS: ':80/tcp'
TRAEFIK_ENTRYPOINTS_otelgrpc: true
TRAEFIK_ENTRYPOINTS_otelgrpc_ADDRESS: ':4317/tcp'
TRAEFIK_ENTRYPOINTS_otelhttp: true
TRAEFIK_ENTRYPOINTS_otelhttp_ADDRESS: ':4318/tcp'
TRAEFIK_PROVIDERS_DOCKER: true
TRAEFIK_PROVIDERS_DOCKER_EXPOSEDBYDEFAULT: false
ports:
- "80:80"
- "8080:8080"
- "4317:4317"
- "4318:4318"
volumes:
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
restart: unless-stopped
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:8080/" ]
interval: 1s
timeout: 2s
retries: 15
jaeger-volume:
image: alpine
command:
- chmod
- "0777"
- /jaeger
volumes:
- type: volume
source: jaeger-data
target: /jaeger
jaeger: # storage backend for trace data
image: jaegertracing/all-in-one:1
hostname: jaeger
environment:
# enable opentelemetry support
COLLECTOR_OTLP_ENABLED: true
# store traces on disk as opposed to memory, allowing persistence between container restarts
SPAN_STORAGE_TYPE: badger
BADGER_EPHEMERAL: false
BADGER_DIRECTORY_KEY: /jaeger/keys
BADGER_DIRECTORY_VALUE: /jaeger/values
volumes:
- type: volume
source: jaeger-data
target: /jaeger
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "16686" # HTTP frontend
labels:
- "traefik.enable=true"
- "traefik.http.routers.jaeger-router.rule=Host(`jaeger.localhost`)"
- "traefix.http.routers.jaeger-router.entrypoints=tcp80"
- "traefik.http.routers.jaeger-router.service=jaeger-service"
- "traefik.http.services.jaeger-service.loadbalancer.server.port=3000"
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:14269/"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger-volume:
condition: service_completed_successfully
loki: # storage backend for log data
image: grafana/loki:2.9.2
hostname: loki
volumes:
- type: bind
source: ./local/etc/loki
target: /etc/loki
read_only: true
- type: volume
source: loki-data
target: /loki
ports:
- "3100"
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3100/ready" ]
interval: 2s
timeout: 2s
retries: 15
prometheus: # storage backend for metric data
image: prom/prometheus:v2.47.2
hostname: prometheus
command:
# enable pushing data into prometheus
- "--web.enable-remote-write-receiver"
# since we override the command, we need to re-specify the config file location
- "--config.file=/etc/prometheus/prometheus.yml"
volumes:
- type: bind
source: ./local/etc/prometheus
target: /etc/prometheus
read_only: true
- type: volume
source: prometheus-data
target: /prometheus
ports:
- "9090"
labels:
- traefik.enable=true
- traefik.http.routers.prom-router.rule=Host(`prometheus.localhost`)
- traefix.http.routers.prom-router.entrypoints=tcp80
- traefik.http.routers.prom-router.service=prom-service
- traefik.http.services.prom-service.loadbalancer.server.port=9090
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:9090/-/healthy"]
interval: 2s
timeout: 2s
retries: 15
grafana:
# log display
image: grafana/grafana:10.2.0
hostname: grafana
environment:
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_DISABLE_LOGIN_FORM: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
volumes:
- type: bind
source: ./local/etc/grafana/provisioning/datasources
target: /etc/grafana/provisioning/datasources
read_only: true
ports:
- "3000"
labels:
- traefik.enable=true
- traefik.http.routers.grafana-router.rule=Host(`telemetry.localhost`)
- traefix.http.routers.grafana-router.entrypoints=tcp80
- traefik.http.routers.grafana-router.service=grafana-service
- traefik.http.services.grafana-service.loadbalancer.server.port=3000
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3000/api/health"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
collector:
image: grafana/agent:latest
command:
- "run"
- "--server.http.listen-addr=0.0.0.0:12345"
- "/etc/grafana/agent/config.river"
environment:
AGENT_MODE: flow
CONFIG_FILE_PATH: /etc/grafana/agent/config.river
volumes:
- type: bind
source: ./local/etc/grafana/agent
target: /etc/grafana/agent
read_only: true
- type: bind
source: ./local/lib/grafana/agent/healthcheck.sh
target: /lib/grafana-healthcheck.sh
read_only: true
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "12345" # web console
labels:
- traefik.enable=true
# :4317 gRPC (TCP forwarding)
- traefik.tcp.routers.otlp-grpc-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-grpc-router.priority=1000
- traefik.tcp.routers.otlp-grpc-router.entrypoints=otelgrpc
- traefik.tcp.routers.otlp-grpc-router.service=otlp-grpc-service
- traefik.tcp.services.otlp-grpc-service.loadbalancer.server.port=4317
# :4318 HTTP (TCP forwarding)
- traefik.tcp.routers.otlp-http-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-http-router.priority=1000
- traefik.tcp.routers.otlp-http-router.entrypoints=otelhttp
- traefik.tcp.routers.otlp-http-router.service=otlp-http-service
- traefik.tcp.services.otlp-http-service.loadbalancer.server.port=4318
# Dashboard (HTTP forwarding)
- traefik.http.routers.collector-router.rule=Host(`collector.localhost`)
- traefix.http.routers.collector-router.entrypoints=tcp80
- traefik.http.routers.collector-router.service=collector-service
- traefik.http.services.collector-service.loadbalancer.server.port=12345
healthcheck:
test: [ "CMD", "/lib/grafana-healthcheck.sh" ]
interval: 2s
timeout: 30s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
Same result:
example: using yaml file
docker-compose.yml
(using yaml file)
---
version: "3"
volumes:
# persistent volumes for storage backends which will remain across container lifetimes
jaeger-data: {} # storage of traces on-disk from jaeger
loki-data: {} # storage of log data on-disk from loki
prometheus-data: {} # storage of metrics data on-disk from prometheus
services:
# utility for exposing docker containers over local DNS
traefik:
image: traefik:v2.10
hostname: traefik
ports:
- "80:80"
- "8080:8080"
- "4317:4317"
- "4318:4318"
volumes:
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
- type: bind
source: ./traefik.yml
target: /etc/traefik/traefik.yml
restart: unless-stopped
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:8080/" ]
interval: 1s
timeout: 2s
retries: 15
jaeger-volume:
image: alpine
command:
- chmod
- "0777"
- /jaeger
volumes:
- type: volume
source: jaeger-data
target: /jaeger
jaeger: # storage backend for trace data
image: jaegertracing/all-in-one:1
hostname: jaeger
environment:
# enable opentelemetry support
COLLECTOR_OTLP_ENABLED: true
# store traces on disk as opposed to memory, allowing persistence between container restarts
SPAN_STORAGE_TYPE: badger
BADGER_EPHEMERAL: false
BADGER_DIRECTORY_KEY: /jaeger/keys
BADGER_DIRECTORY_VALUE: /jaeger/values
volumes:
- type: volume
source: jaeger-data
target: /jaeger
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "16686" # HTTP frontend
labels:
- "traefik.enable=true"
- "traefik.http.routers.jaeger-router.rule=Host(`jaeger.localhost`)"
- "traefix.http.routers.jaeger-router.entrypoints=tcp80"
- "traefik.http.routers.jaeger-router.service=jaeger-service"
- "traefik.http.services.jaeger-service.loadbalancer.server.port=3000"
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:14269/"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger-volume:
condition: service_completed_successfully
loki: # storage backend for log data
image: grafana/loki:2.9.2
hostname: loki
volumes:
- type: bind
source: ./local/etc/loki
target: /etc/loki
read_only: true
- type: volume
source: loki-data
target: /loki
ports:
- "3100"
healthcheck:
test: [ "CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3100/ready" ]
interval: 2s
timeout: 2s
retries: 15
prometheus: # storage backend for metric data
image: prom/prometheus:v2.47.2
hostname: prometheus
command:
# enable pushing data into prometheus
- "--web.enable-remote-write-receiver"
# since we override the command, we need to re-specify the config file location
- "--config.file=/etc/prometheus/prometheus.yml"
volumes:
- type: bind
source: ./local/etc/prometheus
target: /etc/prometheus
read_only: true
- type: volume
source: prometheus-data
target: /prometheus
ports:
- "9090"
labels:
- traefik.enable=true
- traefik.http.routers.prom-router.rule=Host(`prometheus.localhost`)
- traefix.http.routers.prom-router.entrypoints=tcp80
- traefik.http.routers.prom-router.service=prom-service
- traefik.http.services.prom-service.loadbalancer.server.port=9090
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:9090/-/healthy"]
interval: 2s
timeout: 2s
retries: 15
grafana:
# log display
image: grafana/grafana:10.2.0
hostname: grafana
environment:
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_DISABLE_LOGIN_FORM: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
volumes:
- type: bind
source: ./local/etc/grafana/provisioning/datasources
target: /etc/grafana/provisioning/datasources
read_only: true
ports:
- "3000"
labels:
- traefik.enable=true
- traefik.http.routers.grafana-router.rule=Host(`telemetry.localhost`)
- traefix.http.routers.grafana-router.entrypoints=tcp80
- traefik.http.routers.grafana-router.service=grafana-service
- traefik.http.services.grafana-service.loadbalancer.server.port=3000
healthcheck:
test: ["CMD", "wget", "-o", "/dev/null", "-O", "/dev/null", "http://127.0.0.1:3000/api/health"]
interval: 2s
timeout: 2s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
collector:
image: grafana/agent:latest
command:
- "run"
- "--server.http.listen-addr=0.0.0.0:12345"
- "/etc/grafana/agent/config.river"
environment:
AGENT_MODE: flow
CONFIG_FILE_PATH: /etc/grafana/agent/config.river
volumes:
- type: bind
source: ./local/etc/grafana/agent
target: /etc/grafana/agent
read_only: true
- type: bind
source: ./local/lib/grafana/agent/healthcheck.sh
target: /lib/grafana-healthcheck.sh
read_only: true
ports:
- "4317" # grpc otlp
- "4318" # http otlp
- "12345" # web console
labels:
- traefik.enable=true
# :4317 gRPC (TCP forwarding)
- traefik.tcp.routers.otlp-grpc-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-grpc-router.priority=1000
- traefik.tcp.routers.otlp-grpc-router.entrypoints=otelgrpc
- traefik.tcp.routers.otlp-grpc-router.service=otlp-grpc-service
- traefik.tcp.services.otlp-grpc-service.loadbalancer.server.port=4317
# :4318 HTTP (TCP forwarding)
- traefik.tcp.routers.otlp-http-router.rule=HostSNI(`*`)
- traefik.tcp.routers.otlp-http-router.priority=1000
- traefik.tcp.routers.otlp-http-router.entrypoints=otelhttp
- traefik.tcp.routers.otlp-http-router.service=otlp-http-service
- traefik.tcp.services.otlp-http-service.loadbalancer.server.port=4318
# Dashboard (HTTP forwarding)
- traefik.http.routers.collector-router.rule=Host(`collector.localhost`)
- traefix.http.routers.collector-router.entrypoints=tcp80
- traefik.http.routers.collector-router.service=collector-service
- traefik.http.services.collector-service.loadbalancer.server.port=12345
healthcheck:
test: [ "CMD", "/lib/grafana-healthcheck.sh" ]
interval: 2s
timeout: 30s
retries: 15
depends_on:
jaeger:
condition: service_healthy
loki:
condition: service_healthy
prometheus:
condition: service_healthy
traefik.yml
api:
insecure: true
providers:
docker:
exposedByDefault: false
entryPoints:
tcp80:
address: ":80"
otelgrpc:
address: ":4317/tcp"
otelhttp:
address: ":4318/tcp"
Same result:
NOTE: since I'm a "new user" I can't put more than two screenshots in this post. However, the dashboard looks exactly the same in this case.
I'm pretty sure I have tried every possible way of configuring it, and it's still associating the TCP listeners with services that shouldn't have them.
Is this a bug or am I missing something?
Why are all these services, even though they only specify the tcp80 endpoint, get assigned both OpenTelemetry endpoints as well?