SSL certificate not being generated for services in docker swarm

hi folks,

I have other problems with traefik related to basic auth, I hope this issue is not related to that one.
I have set up traefik so that when a new service is deployed on swarm it should have full SSL provisioned using lets encrypt ACME.

Traefik setup

version: '3'
services:
  traefik:
    image: traefik:v2.3
    ports:
      # The HTTP port
      - "80:80"
      # The Web UI (enabled by --api.insecure=true)
      # - "8080:8080"
      - "443:443"
    command:
      - "--log.level=DEBUG"
      - "--global.sendAnonymousUsage=true"
      # Docker swarm configuration
      - "--providers.docker"
      - "--providers.docker.endpoint=unix:///var/run/docker.sock"
      - "--providers.docker.swarmMode=true"
      - "--providers.docker.exposedbydefault=false"
      - "--providers.docker.network=public"
      # Configure entrypoint
      - "--entrypoints.websecure.address=:443"
      - "--entrypoints.web.address=:80"
      # SSL configuration
      - "--certificatesresolvers.letsencryptresolver.acme.httpchallenge=true"
      - "--certificatesresolvers.letsencryptresolver.acme.httpchallenge.entrypoint=web"
      - "--certificatesresolvers.letsencryptresolver.acme.email=${LETS_ENCRYPT_EMAIL}"
      - "--certificatesresolvers.letsencryptresolver.acme.storage=/letsencrypt/acme.json"
      # Global HTTP -> HTTPS
      - "--entrypoints.web.http.redirections.entryPoint.to=websecure"
      - "--entrypoints.web.http.redirections.entryPoint.scheme=https"
      # Enable dashboard/api
      - "--api.dashboard=true"
      - "--api.debug=true"
      - "--api.insecure=false"

    volumes:
      # To persist certificates
      - traefik-certificates:/letsencrypt
      # So that Traefik can listen to the Docker events
      - /var/run/docker.sock:/var/run/docker.sock:ro
    networks:
      - public
    deploy:
      placement:
        constraints:
          - node.role == manager
      labels:
        - "traefik.enable=true"
        - "traefik.http.services.traefik.loadbalancer.server.port=888" # required by swarm but not used.
        - "traefik.http.routers.traefik.rule=Host(`${TRAEFIK_URL}`) "
        - "traefik.http.routers.traefik.entrypoints=websecure"
        - "traefik.http.routers.traefik.tls.certresolver=letsencryptresolver"
        - "traefik.http.routers.traefik.service=api@internal"
        - "traefik.http.routers.traefik.middlewares=traefik-auth"
        - "traefik.http.middlewares.traefik-auth.basicauth.users=${TRAEFIK_BASIC_AUTH_USERS}"

volumes:
  traefik-certificates:
networks:
  public:
    external: true

Then i deploy portainer with the following

# Run before
##
# docker network create -d overlay public
# docker network create -d overlay agent_network
#
##

version: '3'
services:
  agent:
    image: portainer/agent
    environment:
      # REQUIRED: Should be equal to the service name prefixed by "tasks." when
      # deployed inside an overlay network
      AGENT_CLUSTER_ADDR: tasks.agent
      # AGENT_PORT: 9001
      # LOG_LEVEL: debug
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - /var/lib/docker/volumes:/var/lib/docker/volumes
    networks:
      - agent_network
    deploy:
      mode: global
      placement:
        constraints: [node.platform.os == linux]

  portainer:
    image: portainer/portainer-ce:2.0.0
    command: -H tcp://tasks.agent:9001 --tlsskipverify
    volumes:
      - data:/data
    networks:
      - public
      - agent_network
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints: [node.role == manager]
      labels:
      - "traefik.enable=true"
      - "traefik.http.routers.portainer.rule=Host(`${PORTAINER_URL}`)"
      - "traefik.http.routers.portainer.entrypoints=websecure"
      - "traefik.http.services.portainer.loadbalancer.server.port=9000"
      - "traefik.http.routers.portainer.service=portainer"
      # Edge
      - "traefik.http.routers.edge.rule=Host(`${PORTAINER_EDGE_URL}`)"
      - "traefik.http.routers.edge.entrypoints=websecure"
      - "traefik.http.services.edge.loadbalancer.server.port=8000"
      - "traefik.http.routers.edge.service=edge"

networks:
  public:
    external: true
  agent_network:
    external: true

volumes:
   data:

When i visit the portainer domain I get errors in the log

traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:13Z" level=debug msg="Serving default certificate for request: \"portainer202101.domain.space\""
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:13Z" level=debug msg="Serving default certificate for request: \"portainer202101.domain.space\""
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:13Z" level=debug msg="http: TLS handshake error from 10.0.0.2:36920: remote error: tls: unknown certificate"
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:13Z" level=debug msg="http: TLS handshake error from 10.0.0.2:36922: remote error: tls: unknown certificate"
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:15Z" level=debug msg="Filtering disabled container" providerName=docker container=portainer-agent-knvvg1g9t1d206l2d9kt4ir5m
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:15Z" level=debug msg="Filtering disabled container" providerName=docker container=portainer-agent-lyfzpy2frq2qdd8sqfuuwsf8k
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:15Z" level=debug msg="Filtering disabled container" providerName=docker container=portainer-agent-rl1dr7v9py7uvmt616psd5721
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:15Z" level=debug msg="Configuration received from provider docker: {\"http\":{\"routers\":{\"edge\":{\"entryPoints\":[\"websecure\"],\"service\":\"edge\",\"rule\":\"Host(`edge202101.domain.space`)\"},\"portainer\":{\"entryPoints\":[\"websecure\"],\"service\":\"portainer\",\"rule\":\"Host(`portainer202101.domain.space`)\"},\"traefik\":{\"entryPoints\":[\"websecure\"],\"middlewares\":[\"traefik-auth\"],\"service\":\"api@internal\",\"rule\":\"Host(`traefik202101.domain.space`) \",\"tls\":{\"certResolver\":\"letsencryptresolver\"}}},\"services\":{\"edge\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.1.14:8000\"}],\"passHostHeader\":true}},\"portainer\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.1.14:9000\"}],\"passHostHeader\":true}},\"traefik\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.1.3:888\"}],\"passHostHeader\":true}}},\"middlewares\":{\"traefik-auth\":{\"basicAuth\":{\"users\":[\"admin:31235apr131235kfY9ggNB31235DMuIKOae8f6VBU36bMCyW0\"]}}}},\"tcp\":{},\"udp\":{}}" providerName=docker
traefik_traefik.1.oook13dgppd6@myApp-infra-swarm-manager-0    | time="2021-02-02T12:05:15Z" level=info msg="Skipping same configuration" providerName=docker

Again not sure why it is failing as I cannot see any difference between the manual/docs and my implementation. Any clues?

I recommend removing these lines and using the default of TLS-ALPN-01 challenge. The http to http redirects are occurring before the http challenge router is reached.

You may be able to set --certificatesresolvers.letsencryptresolver.acme.httpchallenge.entrypoint=websecure. I have not tried this but the LE resolver is meant to follow redirects.

I have tried a few combinations, the online documentation is a bit weird in my limited opinion

ie Let's Encrypt - Traefik

Uses the SSL config with http challenge = true

Then in the routers labels it mentions routers.blog.tls=true .. ie uses tls instead of httpchallenge

With the new attempts i see

"Adding route for traefik202101.domain.space with TLS options default"
Looking for provided certificate(s) to validate [\"traefik202101.domain.space\"]
"No ACME certificate generation required for domains [\"traefik202101.domain.space\"]."
msg="Filtering disabled container" providerName=docker container=portainer-agent (i assume this is basically disabling the routing to the portainer service)

I am totally lost, not really sure why it is not working and even from the logs I cannot see anything that explicitly says SSL Cert setup or SSL Cert generation failed for XYZ

Usually this means you already have a certificate issued.

Your service agent portainer/agent has no traefik.enable and you have specified "--providers.docker.exposedbydefault=false" so that service is filtered out.

/aside v2.4.2 just released that fixed a bug with the acme http challenge also being redirected http to https.

Just forced tag 2.4.2 but made no difference.

If i call the portainer or edge domains I see traefik logging errors

level=debug msg="http: TLS handshake error from 10.0.0.2:34850: remote error: tls: unknown certificate"

And lots of messages about notification of new configuration that is the same thus skipped

time="2021-02-03T18:14:27Z" level=debug msg="Configuration received from provider docker: {\"http\":{\"routers\":{\"edge\":{\"entryPoints\":[\"websecure\"],\"service\":\"edge\",\"rule\":\"Host(`edge202101.domain.space`)\"},\"portainer\":{\"entryPoints\":[\"websecure\"],\"service\":\"portainer\",\"rule\":\"Host(`portainer202101.domain.space`)\"},\"traefik\":{\"entryPoints\":[\"websecure\"],\"middlewares\":[\"traefik-auth\"],\"service\":\"api@internal\",\"rule\":\"Host(`traefik202101.domain.space`) \",\"tls\":{\"certResolver\":\"letsencryptresolver\"}}},\"services\":{\"edge\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.2.10:8000\"}],\"passHostHeader\":true}},\"portainer\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.2.10:9000\"}],\"passHostHeader\":true}},\"traefik\":{\"loadBalancer\":{\"servers\":[{\"url\":\"http://10.0.2.5:888\"}],\"passHostHeader\":true}}},\"middlewares\":{\"traefik-auth\":{\"basicAuth\":{\"users\":[\"admin:$$apr1$$kfY9ggNB$$DMuIKOae8f6VBU36bMCyW0\"]}}}},\"tcp\":{},\"udp\":{}}" providerName=docker

Just to confirm these are the CLI certificateresolvers lines I have now in the traefik container

      - "--certificatesresolvers.letsencryptresolver.acme.httpchallenge.entrypoint=websecure"
      - "--certificatesresolvers.letsencryptresolver.acme.tlschallenge=true"
      - "--certificatesresolvers.letsencryptresolver.acme.email=${LETS_ENCRYPT_EMAIL}"
      - "--certificatesresolvers.letsencryptresolver.acme.storage=/letsencrypt/acme.json"

Are these correct?