Service Not Discovered After It's Stopped and Restarted for Backups

Hi,

I'm using this repo to deploy a SPA web app: https://github.com/tiangolo/full-stack-fastapi-postgresql

It uses a Traefix proxy for load balancing and the deployment is based on these instructions: https://dockerswarm.rocks/

The deployment works correctly, but when I use docker-volume-backup to backup my Docker volumes several containers are automatically stopped and restarted whilst the backup takes place (to prevent issues with data integrity).

When the containers are restarted the db service is no longer discoverable by the other services, which prevents the app from functioning.

I've attached the docker_compose.yml for reference.

I've tried adding the proxy to the services to be stopped and restarted but that didn't seem to fix the problem.

Any advice would be greatly appreciated.

Thank you!

version: "3.3"
services:

  proxy:
    image: traefik:v2.2
    networks:
      - ${TRAEFIK_PUBLIC_NETWORK?Variable not set}
      - default
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    command:
      # Enable Docker in Traefik, so that it reads labels from Docker services
      - --providers.docker
      # Add a constraint to only use services with the label for this stack
      # from the env var TRAEFIK_TAG
      - --providers.docker.constraints=Label(`traefik.constraint-label-stack`, `${TRAEFIK_TAG?Variable not set}`)
      # Do not expose all Docker services, only the ones explicitly exposed
      - --providers.docker.exposedbydefault=false
      # Enable Docker Swarm mode
      - --providers.docker.swarmmode
      # Enable the access log, with HTTP requests
      - --accesslog
      # Enable the Traefik log, for configurations and errors
      - --log
      # Enable the Dashboard and API
      - --api
    deploy:
      placement:
        constraints:
          - node.role == manager
      labels:
        # Enable Traefik for this service, to make it available in the public network
        - traefik.enable=true
        # Use the traefik-public network (declared below)
        - traefik.docker.network=${TRAEFIK_PUBLIC_NETWORK?Variable not set}
        # Use the custom label "traefik.constraint-label=traefik-public"
        # This public Traefik will only use services with this label
        - traefik.constraint-label=${TRAEFIK_PUBLIC_TAG?Variable not set}
        # traefik-http set up only to use the middleware to redirect to https
        - traefik.http.middlewares.${STACK_NAME?Variable not set}-https-redirect.redirectscheme.scheme=https
        - traefik.http.middlewares.${STACK_NAME?Variable not set}-https-redirect.redirectscheme.permanent=true
        # Handle host with and without "www" to redirect to only one of them
        # Uses environment variable DOMAIN
        # To disable www redirection remove the Host() you want to discard, here and
        # below for HTTPS
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-http.rule=Host(`${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-http.entrypoints=http
        # traefik-https the actual router using HTTPS
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-https.rule=Host(`${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-https.entrypoints=https
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-https.tls=true
        # Use the "le" (Let's Encrypt) resolver created below
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-https.tls.certresolver=le
        # Define the port inside of the Docker service to use
        - traefik.http.services.${STACK_NAME?Variable not set}-proxy.loadbalancer.server.port=80
        # Handle domain with and without "www" to redirect to only one
        # To disable www redirection remove the next line
        # traefik.http.middlewares.${STACK_NAME?Variable not set}-www-redirect.redirectregex.regex=^https?://(www.)?(${DOMAIN?Variable not set})/(.*)
        # Redirect a domain with www to non-www
        # To disable it remove the next line
        - traefik.http.middlewares.${STACK_NAME?Variable not set}-www-redirect.redirectregex.replacement=https://${DOMAIN?Variable not set}/$${3}
        # Redirect a domain without www to www
        # To enable it remove the previous line and uncomment the next
        # - traefik.http.middlewares.${STACK_NAME}-www-redirect.redirectregex.replacement=https://www.${DOMAIN}/$${3}
        # Middleware to redirect www, to disable it remove the next line
        # traefik.http.routers.${STACK_NAME?Variable not set}-proxy-https.middlewares=${STACK_NAME?Variable not set}-www-redirect
        # Middleware to redirect www, and redirect HTTP to HTTPS
        # to disable www redirection remove the section: ${STACK_NAME?Variable not set}-www-redirect,
        - traefik.http.routers.${STACK_NAME?Variable not set}-proxy-http.middlewares=${STACK_NAME?Variable not set}-https-redirect
      restart_policy:
        condition: on-failure

  db:
    image: postgres:12
    volumes:
      - app-db-data:/var/lib/postgresql/data/pgdata
    env_file:
      - .env
    environment:
      - PGDATA=/var/lib/postgresql/data/pgdata
    deploy:
      placement:
        constraints:
          - node.labels.${STACK_NAME?Variable not set}.app-db-data == true
      restart_policy:
        condition: on-failure
    labels:
      # Adding this label means this container should be stopped while it's being backed up:
      - docker-volume-backup.stop-during-backup=true


  pgadmin:
    image: dpage/pgadmin4
    networks:
      - ${TRAEFIK_PUBLIC_NETWORK?Variable not set}
      - default
    depends_on:
      - db
    env_file:
      - .env
    deploy:
      labels:
        - traefik.enable=true
        - traefik.docker.network=${TRAEFIK_PUBLIC_NETWORK?Variable not set}
        - traefik.constraint-label=${TRAEFIK_PUBLIC_TAG?Variable not set}
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-http.rule=Host(`pgadmin.${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-http.entrypoints=http
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-http.middlewares=${STACK_NAME?Variable not set}-https-redirect
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-https.rule=Host(`pgadmin.${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-https.entrypoints=https
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-https.tls=true
        - traefik.http.routers.${STACK_NAME?Variable not set}-pgadmin-https.tls.certresolver=le
        - traefik.http.services.${STACK_NAME?Variable not set}-pgadmin.loadbalancer.server.port=5050
      restart_policy:
        condition: on-failure
    labels:
      # Adding this label means this container should be stopped while it's being backed up:
      - docker-volume-backup.stop-during-backup=true

  queue:
    image: rabbitmq:3
    # Using the below image instead is required to enable the "Broker" tab in the flower UI:
    # image: rabbitmq:3-management
    #
    # You also have to change the flower command

  flower:
    image: mher/flower
    networks:
      - ${TRAEFIK_PUBLIC_NETWORK?Variable not set}
      - default
    env_file:
      - .env
    command:
      - "--broker=amqp://guest@queue:5672//"
      # For the "Broker" tab to work in the flower UI, uncomment the following command argument,
      # and change the queue service's image as well
      # - "--broker_api=http://guest:guest@queue:15672/api//"
    deploy:
      labels:
        - traefik.enable=true
        - traefik.docker.network=${TRAEFIK_PUBLIC_NETWORK?Variable not set}
        - traefik.constraint-label=${TRAEFIK_PUBLIC_TAG?Variable not set}
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-http.rule=Host(`flower.${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-http.entrypoints=http
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-http.middlewares=${STACK_NAME?Variable not set}-https-redirect
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-https.rule=Host(`flower.${DOMAIN?Variable not set}`)
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-https.entrypoints=https
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-https.tls=true
        - traefik.http.routers.${STACK_NAME?Variable not set}-flower-https.tls.certresolver=le
        - traefik.http.services.${STACK_NAME?Variable not set}-flower.loadbalancer.server.port=5555
      restart_policy:
        condition: on-failure

  backend:
    image: '${DOCKER_IMAGE_BACKEND?Variable not set}:${TAG-latest}'
    depends_on:
      - db
    volumes:
      - app-upload-data:/app/app/uploads
    env_file:
      - .env
    environment:
      - SERVER_NAME=${DOMAIN?Variable not set}
      - SERVER_HOST=https://${DOMAIN?Variable not set}
      # Allow explicit env var override for tests
      - SMTP_HOST=${SMTP_HOST}
    build:
      context: ./backend
      dockerfile: backend.dockerfile
      args:
        INSTALL_DEV: ${INSTALL_DEV-false}
    deploy:
      labels:
        - traefik.enable=true
        - traefik.constraint-label-stack=${TRAEFIK_TAG?Variable not set}
        - traefik.http.routers.${STACK_NAME?Variable not set}-backend-http.rule=PathPrefix(`/api`) || PathPrefix(`/docs`) || PathPrefix(`/redoc`)
        - traefik.http.services.${STACK_NAME?Variable not set}-backend.loadbalancer.server.port=80
      placement:
        constraints:
          - node.labels.${STACK_NAME?Variable not set}.app-upload-data == true
      restart_policy:
        condition: on-failure
    labels:
      # Adding this label means this container should be stopped while it's being backed up:
      - docker-volume-backup.stop-during-backup=true

  celeryworker:
    image: '${DOCKER_IMAGE_CELERYWORKER?Variable not set}:${TAG-latest}'
    depends_on:
      - db
      - queue
    volumes:
      - app-upload-data:/app/app/uploads
    env_file:
      - .env
    environment:
      - SERVER_NAME=${DOMAIN?Variable not set}
      - SERVER_HOST=https://${DOMAIN?Variable not set}
      # Allow explicit env var override for tests
      - SMTP_HOST=${SMTP_HOST?Variable not set}
    build:
      context: ./backend
      dockerfile: celeryworker.dockerfile
      args:
        INSTALL_DEV: ${INSTALL_DEV-false}
    deploy:
      placement:
        constraints:
          - node.labels.${STACK_NAME?Variable not set}.app-upload-data == true
      restart_policy:
        condition: on-failure
    labels:
      # Adding this label means this container should be stopped while it's being backed up:
      - docker-volume-backup.stop-during-backup=true

  frontend:
    image: '${DOCKER_IMAGE_FRONTEND?Variable not set}:${TAG-latest}'
    build:
      context: ./frontend
      args:
        FRONTEND_ENV: ${FRONTEND_ENV-production}
    deploy:
      labels:
        - traefik.enable=true
        - traefik.constraint-label-stack=${TRAEFIK_TAG?Variable not set}
        - traefik.http.routers.${STACK_NAME?Variable not set}-frontend-http.rule=PathPrefix(`/`)
        - traefik.http.services.${STACK_NAME?Variable not set}-frontend.loadbalancer.server.port=80

  backup:
    image: futurice/docker-volume-backup
    environment:
      BACKUP_CRON_EXPRESSION: "#"
      BACKUP_FILENAME: latest.tar.gz
      AWS_S3_BUCKET_NAME: surge-analytics-${TAG?Variable not set}-backups
      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID?Variable not set}
      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY?Variable not set}
    volumes:
      # Allow use of the "stop-during-backup" feature
      - /var/run/docker.sock:/var/run/docker.sock:ro
      # Mount the app db data volume (as read-only)
      - app-db-data:/backup/app-db-data:ro
      # Mount the appp upload data volume (as read-only)
      - app-upload-data:/backup/app-upload-data:ro

volumes:
  app-db-data:
  app-upload-data:

networks:
  traefik-public:
    # Allow setting it to false for testing
    external: ${TRAEFIK_PUBLIC_NETWORK_IS_EXTERNAL-true}