Dear Traefik Community,
I have a Problem with TCP in my Project.
Intro
The Project is a DNS server (running on my raspberry pi) constructed with 5 docker containers: traefik
, an DoH server
, pihole
, unbound
and currently nginx
, but only because of the TCP problems.
Normal DNS requests (port 53, without traefik) and DoH (DNS over HTTPS | port 443, terminate TLS, send to DoH server) works perfectly fine, the dashboards also.
The corresponding configs for the working construct you can find below. The whole config is open source on github (link in first sentence.)
Certificates
My certificates are all self signed by my own CA certificate which is installed on my machines. SO there are no problems with certificates.
Traefik configs of working construct
traefik.toml
[global]
checkNewVersion = true
sendAnonymousUsage = false
[api]
dashboard = true
insecure = false
debug = false
[ping]
entryPoint = "http"
[log]
level = "INFO" #DEBUG, INFO, WARN, ERROR, FATAL, PANIC
format = "common" #json
[accessLog]
format = "common" #json
[entryPoints]
[entryPoints.http]
address = ":80"
[entryPoints.https]
address = ":443"
[entryPoints.dot]
address = ":853"
[providers]
[providers.docker]
watch = true
endpoint = "unix:///var/run/docker.sock"
exposedByDefault = false
[providers.file]
watch = true
directory = "/etc/traefik/traefik.conf.d/"
https_redirect.toml
[http.routers]
[http.routers.rou_GlobalHttps]
entryPoints = ["http"]
rule = "HostRegexp(`{host:.+}`)"
priority = 1
middlewares = ["mdw_HttpsRedirect"]
service = "svc_DummyService"
[http.middlewares]
[http.middlewares.mdw_HttpsRedirect.redirectscheme]
permanent = true
scheme = "https"
[http.services]
[http.services.svc_DummyService.loadBalancer]
[[http.services.svc_DummyService.loadBalancer.servers]]
url = "dummy.service"
middleware.toml
[http.middlewares]
[http.middlewares.mdw_SecureHeaders.headers]
sslRedirect=true
forceSTSHeader=true
STSIncludeSubdomains=true
STSSeconds=63072000
STSPreload=true
browserXSSFilter=true
contentTypeNosniff=true
frameDeny=true
tls.toml
[tls.options]
[tls.options.default]
minVersion = "VersionTLS13"
cipherSuites = [
"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
]
[[tls.certificates]]
certFile = "/etc/ssl/certs/cert.crt"
keyFile = "/etc/ssl/private/key.key"
docker-compose
The variables are automatically set via a start script. The compose stuff is currently split among 3 files: 1 base file, 1 for traefik (+ nginx) and 1 for nginx
docker-compose.yaml without doh_server and unbound parts
version: '3.5'
services:
# pihole container
pihole:
container_name: pihole
hostname: ${HOST_NAME}
depends_on:
- unbound
image: pihole/pihole:latest
environment:
- TZ=${TIMEZONE:-Europe/London}
- ServerIP=${HOST_IP}
- DNS1=172.16.1.5#53
- DNS2=no
- DOMAIN=${DOMAIN}
- HOST_IP=${HOST_IP}
volumes:
- ./pihole-docker/resolv.conf:/etc/resolv.conf
- ./pihole-docker/configs/pihole/:/etc/pihole/
- ./pihole-docker/configs/dnsmasq.d/dnsmasq.conf:/etc/dnsmasq.d/02-custom.conf
- ./pihole-docker/01-conf-dnsmasq.sh:/etc/cont-init.d/01-conf-dnsmasq.sh
ports:
- "53:53/tcp"
- "53:53/udp"
expose:
- "80"
networks:
dns_network0:
ipv4_address: 172.16.1.4
dns:
- 127.0.0.1
restart: always
labels:
- traefik.enable=true
- traefik.docker.network=${TRAEFIK_NETWORK:-traefik_proxy}
##### http
### services
# backend port
- traefik.http.services.svc_PiholeGui.loadbalancer.server.port=80
### middleware
# redirecting pi.hole
- traefik.http.middlewares.mdw_RedirectPihole.redirectregex.permanent=true
- traefik.http.middlewares.mdw_RedirectPihole.redirectregex.regex=^.*pi\.hole(.*)
- traefik.http.middlewares.mdw_RedirectPihole.redirectregex.replacement=https://pihole.${DOMAIN}$$1
# make sure `/admin` is there
- traefik.http.middlewares.mdw_AddAdminPath.replacepathregex.regex=^/((?i:(admin)/{0,1}|.{0})(.*))
- traefik.http.middlewares.mdw_AddAdminPath.replacepathregex.replacement=/admin/$$3
# pihole chain
- traefik.http.middlewares.mdw_PiholeChain.chain.middlewares=mdw_RedirectPihole,mdw_AddAdminPath,mdw_SecureHeaders@file
### routers
# pihole dashboard
- traefik.http.routers.rou_PiholeGui.entrypoints=https
- traefik.http.routers.rou_PiholeGui.rule=Host(`pihole.${DOMAIN}`,`pi.hole`)
- traefik.http.routers.rou_PiholeGui.tls=true
- traefik.http.routers.rou_PiholeGui.tls.options=default
- traefik.http.routers.rou_PiholeGui.middlewares=mdw_PiholeChain
- traefik.http.routers.rou_PiholeGui.service=svc_PiholeGui
networks:
# Bridge network for internal communication
dns_network0:
name: dns_network0
driver: bridge
driver_opts:
encrypted: "true"
ipam:
config:
- subnet: 172.16.1.0/24
attachable: false
docker-compose.traefik.yaml without doh_server part
version: '3.5'
services:
# pihole container
pihole:
networks:
traefik_proxy:
ipv4_address: 172.16.0.4
# nginx container
nginx:
container_name: nginx
hostname: ${HOST_NAME}
image: nginx:1.17
environment:
- DOMAIN=${DOMAIN}
volumes:
- ./nginx-docker/configs/:/etc/nginx/
- ./nginx-docker/templates/:/etc/nginx/templates/
- ./certificates/cert.crt:/etc/ssl/certs/cert.crt
- ./certificates/key.key:/etc/ssl/private/key.key
- ./certificates/dhparam.pem:/etc/nginx/dhparam.pem
expose:
- 853
networks:
dns_network0:
ipv4_address: 172.16.1.2
traefik_proxy:
ipv4_address: 172.16.0.2
restart: always
command: >-
/bin/bash -c
"envsubst < /etc/nginx/templates/443.conf.template > /etc/nginx/sites-enabled/443.conf &&
nginx -g 'daemon off;'"
labels:
- traefik.enable=true
- traefik.docker.network=traefik_proxy
##### tcp
### services
# backend port
- traefik.tcp.services.svc_NginxDot.loadbalancer.server.port=853
### routers
# DoT forward
- traefik.tcp.routers.rou_NginxDot.entrypoints=dot
- traefik.tcp.routers.rou_NginxDot.rule=HostSNI(`dot.${DOMAIN}`)
- traefik.tcp.routers.rou_NginxDot.tls.passthrough=true
- traefik.tcp.routers.rou_NginxDot.service=svc_NginxDot
# træfik container
traefik:
container_name: traefik
hostname: ${HOST_NAME}
image: traefik:v2.0
environment:
- TZ=${TIMEZONE:-Europe/London}
volumes:
- /etc/localtime:/etc/localtime:ro
- /var/run/docker.sock:/var/run/docker.sock
- ./traefik-docker/configs/:/etc/traefik/
- ./traefik-docker/shared/:/shared/:ro
- ./certificates/cert.crt:/etc/ssl/certs/cert.crt
- ./certificates/key.key:/etc/ssl/private/key.key
ports:
- "80:80"
- "443:443"
- "853:853"
- "8080:8080"
networks:
traefik_proxy:
ipv4_address: 172.16.0.250
restart: always
labels:
- traefik.enable=true
- traefik.docker.network=traefik_proxy
##### https
### middleware
# dashboard auth
- traefik.http.middlewares.mdw_TraefikAuth.basicauth.usersfile=/shared/.htpasswd
# dashboard chain
- traefik.http.middlewares.mdw_TraefikChainNoAuth.chain.middlewares=mdw_SecureHeaders@file
- traefik.http.middlewares.mdw_TraefikChainAuth.chain.middlewares=mdw_SecureHeaders@file,mdw_TraefikAuth
### routers
# traefik dashboard
- traefik.http.routers.rou_Traefik.entrypoints=https
- traefik.http.routers.rou_Traefik.rule=Host(`traefik.${DOMAIN}`)
- traefik.http.routers.rou_Traefik.tls=true
- traefik.http.routers.rou_Traefik.tls.options=default
- traefik.http.routers.rou_Traefik.middlewares=mdw_TraefikChain${TRAEFIK_AUTH:-NoAuth}
- traefik.http.routers.rou_Traefik.service=api@internal
networks:
# Bridge network for træfik's communication
traefik_proxy:
name: traefik_proxy
driver: bridge
driver_opts:
encrypted: "true"
ipam:
config:
- subnet: 172.16.0.0/24
attachable: false
Problem
DoT (DNS over TLS) on the other hand holds the problem. It does work when I pass through the TCP TLS traffic to nginx and let nginx handle the TCP stream to be send to pihole -> unbound
after tls termination (see config above).
I can confirm this my looking at the clients on the pihole dashboard.
But when I let traefik do its thing with TCP with and without TLS it does not work anymore. Neither sending the (TLS terminated) TCP stream to pihole nor unbound directly. Non of the four setups work.
Findings
Here are the IPs and corresponding parts:
- 192.168.178.206 - test machine
- 192.168.178.221 - dns server (RasPi)
- 172.16.0.250 - traefik docker
- 172.16.0.4 - pihole docker
- 172.16.0.5 - unbound docker
And here are my findings:
The logs from the test machines unbound are veeeerrrry long and verbose. So I only show some little findings.
TLS: on; target: pihole
Unbound:
- "SSL connection to *.guenther.dns authenticated ip4 192.168.178.221 port 853 (len 16)"
- "tcp error for address ip4 192.168.178.221 port 853 (len 16)"
Traefik shows those three line multiple times with other ports:
- time="2019-11-18T21:32:53+01:00" level=debug msg="Handling connection from 192.168.178.206:50914"
- time="2019-11-18T21:32:53+01:00" level=debug msg="Error while terminating connection: tls: CloseWrite called before handshake complete"
- time="2019-11-18T21:32:56+01:00" level=debug msg="Error while terminating connection: close tcp 172.16.0.250:35404->172.16.0.4:53: shutdown: transport endpoint is not connected"
The middle line show only after some testing. At first there were only the first and third line in the logs. But the test machines unbound still authenticated the connection (see above).
TLS: off; target pihole
Unbound:
- tcp error for address ip4 192.168.178.221 port 853 (len 16)
Traefik shows those two line multiple times with other ports:
- time="2019-11-18T21:39:30+01:00" level=debug msg="Handling connection from 192.168.178.206:50962"
- time="2019-11-18T21:39:30+01:00" level=debug msg="Error while terminating connection: close tcp 172.16.0.250:35474->172.16.0.4:53: shutdown: transport endpoint is not connected"
TLS: on; target: unbound
Unbound:
- strangely no ssl/tls authentication entry like with pihole as target
- "tcp error for address ip4 192.168.178.221 port 853 (len 16)"
Traefik shows those two line multiple times with other ports, but the second line comes only after some time:
- time="2019-11-18T21:14:21+01:00" level=debug msg="Handling connection from 192.168.178.206:50828"
- time="2019-11-18T21:15:27+01:00" level=error msg="Error while connection to backend: dial tcp 172.16.1.5:53: connect: connection timed out"
TLS: off; target unbound
Unbound:
- "tcp error for address ip4 192.168.178.221 port 853 (len 16)"
Traefik shows those two line multiple times with other ports, but the second line comes only after some time:
- time="2019-11-18T21:28:36+01:00" level=debug msg="Handling connection from 192.168.178.206:50910"
- time="2019-11-18T21:30:07+01:00" level=error msg="Error while connection to backend: dial tcp 172.16.1.5:53: connect: connection timed out"
Changed configs
The nginx docker is killed and the labels from pihole are complemented with tcp config. For testing unbound the labels are added to unbound instead.
labels for tcp + tls added to pihole
##### tcp
### services
# backend port
- traefik.tcp.services.svc_PiholeDns.loadbalancer.server.port=53
### routers
# DoT forward
- traefik.tcp.routers.rou_PiholeDot.entrypoints=dot
- traefik.tcp.routers.rou_PiholeDot.rule=HostSNI(`dot.${DOMAIN}`)
- traefik.tcp.routers.rou_PiholeDot.tls=true
- traefik.tcp.routers.rou_PiholeDot.tls.options=default
- traefik.tcp.routers.rou_PiholeDot.service=svc_PiholeDns
labels for tcp without tls added to pihole
##### tcp
### services
# backend port
- traefik.tcp.services.svc_PiholeDns.loadbalancer.server.port=53
### routers
# DoT forward
- traefik.tcp.routers.rou_PiholeDot.entrypoints=dot
- traefik.tcp.routers.rou_PiholeDot.rule=HostSNI(`*`)
- traefik.tcp.routers.rou_PiholeDot.service=svc_PiholeDns
Testing machine
For testing I use another unbound instance running on my linux mint vm which sends the dns requests via TCP (with and without TLS) to the RasPi-DNS-Server.
unbound config with tls test machine
#Adding DNS-Over-TLS support
server:
logfile: ""
log-time-ascii: yes
log-queries: yes
log-replies: yes
log-tag-queryreply: yes
verbosity: 5
do-udp: yes
do-tcp: yes
tcp-upstream: yes
tls-cert-bundle: /etc/ssl/certs/ca-certificates.crt
forward-zone:
name: "."
forward-ssl-upstream: yes
forward-no-cache: yes
forward-addr: 192.168.178.221@853#dot.guenther.dns
# For testing if setup works
# forward-addr: 9.9.9.9@853#quad9.net
unbound config without tls test machine
#Adding DNS-Over-TLS support
server:
logfile: ""
log-time-ascii: yes
log-queries: yes
log-replies: yes
log-tag-queryreply: yes
verbosity: 5
do-udp: yes
do-tcp: yes
tcp-upstream: yes
tls-cert-bundle: /etc/ssl/certs/ca-certificates.crt
forward-zone:
name: "."
forward-ssl-upstream: no
forward-no-cache: yes
forward-addr: 192.168.178.221@853#dot.guenther.dns
# For testing if setup works
# forward-addr: 9.9.9.9@853#quad9.net
Afterword
The strange thing is that it works with nginx just fine but not with traefik.
I hope you can help me migrate fully from nginx to traefik.
Looking forward to hear ideas. If you need more information I will try my best to deliver.
Thanks in advance,
Cielquan
EDIT: With the unbound tests unbound was not in the traefik network so the tests are invalid.