Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid memory address crash when running a backup #403

Closed
lyze237 opened this issue Apr 15, 2024 · 16 comments · Fixed by #405
Closed

Invalid memory address crash when running a backup #403

lyze237 opened this issue Apr 15, 2024 · 16 comments · Fixed by #405
Labels
bug Something isn't working

Comments

@lyze237
Copy link

lyze237 commented Apr 15, 2024

Hey!

Im trying to setup the project with docker swarm, but whenever I run a backup I get a null pointer:

 time=2024-04-15T08:45:58.629Z level=INFO msg="Successfully scheduled backup from environment with expression 47 * * * *"
 time=2024-04-15T08:47:00.000Z level=INFO msg="Now running script on schedule 47 * * * *"
 time=2024-04-15T08:47:00.219Z level=ERROR msg="Unexpected error running schedule 47 * * * *: runtime error: invalid memory address or nil pointer dereference" error="main.runScript.func1: unexpected panic running script: runtime error: invalid memory address or nil pointer dereference"

stack ps:

ID             NAME                                        IMAGE                               NODE                DESIRED STATE   CURRENT STATE            ERROR     PORTS
cibr7veryup0   influx_backup.qfgvk3lkntbcstbbs7b6chb1y     offen/docker-volume-backup:latest   docker-management   Running         Running 10 minutes ago
vj9kj0oaxsyo   influx_influxdb.qfgvk3lkntbcstbbs7b6chb1y   influxdb:1.8                        docker-management   Running         Running 10 minutes ago

docker stack file:

version: '3.3'

services:
  backup:
    image: offen/docker-volume-backup:latest
    restart: always
    environment:
      BACKUP_RETENTION_DAYS: "7"
      BACKUP_CRON_EXPRESSION: "52 * * * *"
      BACKUP_PRUNING_LEEWAY: "5s"
      SSH_HOST_NAME: 192.168.122.186
      SSH_PORT: 22
      SSH_USER: lyze
      SSH_PASSWORD: pw
      SSH_REMOTE_PATH: /home/lyze/backups
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - influx-data:/backup/influx-data:ro
    deploy:
      mode: global
      placement:
        constraints:
          - node.role == manager

  influxdb:
    image: influxdb:1.8
    volumes:
      - influx-data:/var/lib/influxdb
    networks:
      - net
    deploy:
      labels:
        - docker-volume-backup.stop-during-backup=true
      resources:
        limits:
          cpus: '0.60'
          memory: 512M
        reservations:
          cpus: '0.30'
          memory: 128M
      mode: global
      placement:
        constraints:
          - node.role == manager

networks:
  net:
    driver: overlay

volumes:
  influx-data:
    driver: local

This also happens with the docker file from the tests folder in your repo (Slightly adjusted to make sure that the backup container runs on the manager node):

sudo docker exec fa29d2fa55f0 backup
time=2024-04-15T08:59:03.382Z level=ERROR msg="Fatal error running command: runtime error: invalid memory address or nil pointer dereference" error="main.(*command).runAsCommand: error running script: main.runScript.func1: unexpected panic running script: runtime error: invalid memory address or nil pointer dereference"
ID             NAME                   IMAGE                                      NODE                DESIRED STATE   CURRENT STATE           ERROR     PORTS
md9x5l4f6iys   backup-test_backup.1   offen/docker-volume-backup:latest          docker-management   Running         Running 3 minutes ago
ccjk1hq5k1xt   backup-test_minio.1    minio/minio:RELEASE.2020-08-04T23-10-51Z   docker-default      Running         Running 3 minutes ago
a78w8tdg7jaj   backup-test_offen.1    offen/offen:latest                         docker-default      Running         Running 3 minutes ago
azeu0we83lb7   backup-test_offen.2    offen/offen:latest                         docker-vpn          Running         Running 3 minutes ago
wf2r8uoic7zs   backup-test_pg.1       postgres:14-alpine                         docker-default      Running         Running 3 minutes ago
version: '3.8'

services:
  minio:
    image: minio/minio:RELEASE.2020-08-04T23-10-51Z
    deploy:
      restart_policy:
        condition: on-failure
    environment:
      MINIO_ROOT_USER: test
      MINIO_ROOT_PASSWORD: test
      MINIO_ACCESS_KEY: test
      MINIO_SECRET_KEY: GMusLtUmILge2by+z890kQ
    entrypoint: /bin/ash -c 'mkdir -p /data/backup && minio server /data'
    volumes:
      - backup_data:/data

  backup:
    image: offen/docker-volume-backup:latest
    depends_on:
      - minio
    deploy:
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.role == manager
    environment:
      AWS_ACCESS_KEY_ID: test
      AWS_SECRET_ACCESS_KEY: GMusLtUmILge2by+z890kQ
      AWS_ENDPOINT: minio:9000
      AWS_ENDPOINT_PROTO: http
      AWS_S3_BUCKET_NAME: backup
      BACKUP_FILENAME: test.tar.gz
      BACKUP_CRON_EXPRESSION: 0 0 5 31 2 ?
      BACKUP_RETENTION_DAYS: 7
      BACKUP_PRUNING_LEEWAY: 5s
    volumes:
      - pg_data:/backup/pg_data:ro
      - /var/run/docker.sock:/var/run/docker.sock

  offen:
    image: offen/offen:latest
    labels:
      - docker-volume-backup.stop-during-backup=true
    healthcheck:
      disable: true
    deploy:
      replicas: 2
      restart_policy:
        condition: on-failure

  pg:
    image: postgres:14-alpine
    environment:
      POSTGRES_PASSWORD: example
    labels:
      - docker-volume-backup.stop-during-backup=true
    volumes:
      - pg_data:/var/lib/postgresql/data
    deploy:
      restart_policy:
        condition: on-failure

volumes:
  backup_data:
    name: backup_data
  pg_data:
    name: pg_data

Expected behavior
The backup shouldn't crash.

Version (please complete the following information):

  • Image Version: v2.39.0
  • Docker Version: Docker version 26.0.1, build d260a54

Additional context
I have 3 VMs running in a basic swarm network, they all run Ubuntu Server 22.04:

lyze@docker-management:~/stacks$ sudo docker node ls
ID                            HOSTNAME            STATUS    AVAILABILITY   MANAGER STATUS   ENGINE VERSION
603pnevt1ayxlgxoruya6uia0     docker-default      Ready     Active                          26.0.1
qfgvk3lkntbcstbbs7b6chb1y *   docker-management   Ready     Active         Leader           26.0.1
tw74duuij6dsun881i2n3lodl     docker-vpn          Ready     Active                          26.0.1
@m90
Copy link
Member

m90 commented Apr 15, 2024

Slightly adjusted to make sure that the backup container runs on the manager node

So the only change you made to the test file is

      placement:
        constraints:
          - node.role == manager

or did something else change as well?

sudo docker exec fa29d2fa55f0 backup

I wonder if the problem is related to the use of sudo here? What's your Docker setup? Do you e.g. use rootless or similar? Did you (not) add the current user to the docker group (https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user)?

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

Thanks for the quick answer.

I wget the swarm example file then ran sudo docker stack deploy -c docker-compose.yml backup-test

Without changing anything the backup container wont start because "No such image: offen/docker-v…" therefore I changed the tag to latest and then it started fine.

This spun up the backup container on one of my worker nodes.

Then I sshd into that worker node, and ran the backup command, then I got this error:

$ ssh 192.168.122.32
$ sudo docker ps
CONTAINER ID   IMAGE                                      COMMAND                  CREATED              STATUS              PORTS             NAMES
69298814eff8   postgres:14-alpine                         "docker-entrypoint.s…"   About a minute ago   Up About a minute   5432/tcp          backup-test_pg.1.tm1cry5ou5bkzci6k5vqe6spy
e96072e0835f   offen/offen:latest                         "/sbin/tini -- offen"    About a minute ago   Up About a minute   80/tcp, 443/tcp   backup-test_offen.1.9gddtlxim36vdo7nq9mw6iq8n
d4a36f744622   offen/docker-volume-backup:latest          "/usr/bin/backup -fo…"   About a minute ago   Up About a minute                     backup-test_backup.1.ia45i9llrah90sb0l4azfdldw

$ sudo docker exec d4a36f744622 backup
time=2024-04-15T10:34:25.469Z level=ERROR msg="Fatal error running command: This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager." error="main.(*command).runAsCommand: error running script: main.runScript.func4: error running script: main.(*script).stopContainersAndServices: error querying for services: Error response from daemon: This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager."

Hence I also added the placement constraint on the backup container:

      placement:
        constraints:
          - node.role == manager

Then I got the error from above.

I wonder if the problem is related to the use of sudo here?

The current user I run commands from is not in the docker group so I use sudo to execute commands as root when I want to manage my docker containers.

I personally wouldn't want to add the user to the docker group as that would give that user pretty much root access.

However for testing I did that and got the same error:

$ sudo usermod -aG docker lyze
$ sudo reboot # reboot disconnects ssh 
$ ssh 192.168.4.150 # reconnect
$ docker stack deploy -c docker-compose.yml backup-test
$ docker ps
CONTAINER ID   IMAGE                               COMMAND                  CREATED          STATUS                 PORTS                                                                                                                 NAMES
4a6bcdae8f24   offen/docker-volume-backup:latest   "/usr/bin/backup -fo…"   11 seconds ago   Up 11 seconds                                                                                                                                backup-test_backup.1.xmrwrdxgjxg2btso1wp0d9sg8
...

$ docker exec 4a6bcdae8f24 backup
time=2024-04-15T10:39:51.634Z level=ERROR msg="Fatal error running command: runtime error: invalid memory address or nil pointer dereference" error="main.(*command).runAsCommand: error running script: main.runScript.func1: unexpected panic running script: runtime error: invalid memory address or nil pointer dereference"

@m90
Copy link
Member

m90 commented Apr 15, 2024

Hmm, this is very hard to tell me for as I can't really reproduce it in any way. To be sure it's not caused by your Docker version I also created #404 which updates the version used in tests to 26, but everything is passing as expected.

Right now, it's very hard for me to tell why you are this (highly unexpected) error. My hunch is that the docker cli client somehow is nil in your setup, but then again this should handle any errors on creating gracefully instead of using a nil client:

if !os.IsNotExist(err) || dockerHostSet {
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
return errwrap.Wrap(err, "failed to create docker client")
}
s.cli = cli
s.registerHook(hookLevelPlumbing, func(err error) error {
if err := s.cli.Close(); err != nil {
return errwrap.Wrap(err, "failed to close docker client")
}
return nil
})
}

@m90 m90 added the needs-info label Apr 15, 2024
@m90
Copy link
Member

m90 commented Apr 15, 2024

Could you provide the output of docker info for your manager node? Maybe there is a hint to be found in there.

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

Client: Docker Engine - Community
 Version:    26.0.1
 Context:    default
 Debug Mode: false
 Plugins:
  buildx: Docker Buildx (Docker Inc.)
    Version:  v0.13.1
    Path:     /usr/libexec/docker/cli-plugins/docker-buildx
  compose: Docker Compose (Docker Inc.)
    Version:  v2.26.1
    Path:     /usr/libexec/docker/cli-plugins/docker-compose

Server:
 Containers: 16
  Running: 6
  Paused: 0
  Stopped: 10
 Images: 12
 Server Version: 26.0.1
 Storage Driver: overlay2
  Backing Filesystem: extfs
  Supports d_type: true
  Using metacopy: false
  Native Overlay Diff: true
  userxattr: false
 Logging Driver: json-file
 Cgroup Driver: systemd
 Cgroup Version: 2
 Plugins:
  Volume: local
  Network: bridge host ipvlan macvlan null overlay
  Log: awslogs fluentd gcplogs gelf journald json-file local splunk syslog
 Swarm: active
  NodeID: qfgvk3lkntbcstbbs7b6chb1y
  Is Manager: true
  ClusterID: v24n80o1pgeomfgeatykfut9e
  Managers: 1
  Nodes: 3
  Default Address Pool: 10.0.0.0/8  
  SubnetSize: 24
  Data Path Port: 4789
  Orchestration:
   Task History Retention Limit: 5
  Raft:
   Snapshot Interval: 10000
   Number of Old Snapshots to Retain: 0
   Heartbeat Tick: 1
   Election Tick: 10
  Dispatcher:
   Heartbeat Period: 5 seconds
  CA Configuration:
   Expiry Duration: 3 months
   Force Rotate: 0
  Autolock Managers: false
  Root Rotation In Progress: false
  Node Address: 192.168.122.186
  Manager Addresses:
   192.168.122.186:2377
 Runtimes: io.containerd.runc.v2 runc
 Default Runtime: runc
 Init Binary: docker-init
 containerd version: e377cd56a71523140ca6ae87e30244719194a521
 runc version: v1.1.12-0-g51d5e94
 init version: de40ad0
 Security Options:
  apparmor
  seccomp
   Profile: builtin
  cgroupns
 Kernel Version: 5.15.0-102-generic
 Operating System: Ubuntu 22.04.4 LTS
 OSType: linux
 Architecture: x86_64
 CPUs: 5
 Total Memory: 3.814GiB
 Name: docker-management
 ID: fe704fe5-6db8-444b-a164-b8f629f37ace
 Docker Root Dir: /var/lib/docker
 Debug Mode: false
 Experimental: false
 Insecure Registries:
  127.0.0.0/8
 Live Restore Enabled: false

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

I've added a fmt.Println(s.cli) at line 117 and got this output:
&{http unix:///var/run/docker.sock unix /var/run/docker.sock 0xc000334ed0 1.43 map[] false true false}
So it seems the cli client is not null.

It also reaches the last return nil.

If you want me to run a patch or add some debug lines, just tell me please!

@m90
Copy link
Member

m90 commented Apr 15, 2024

What you could do is extend this block:

defer func() {
if derr := recover(); derr != nil {
asErr, ok := derr.(error)
if ok {
err = errwrap.Wrap(asErr, "unexpected panic running script")
} else {
err = errwrap.Wrap(nil, fmt.Sprintf("%v", derr))
}
}
}()

to read

	defer func() {
		if derr := recover(); derr != nil {
			asErr, ok := derr.(error)
			if ok {
				fmt.Printf("%s: %s\n", asErr, debug.Stack())
				err = errwrap.Wrap(asErr, "unexpected panic running script")
			} else {
				err = errwrap.Wrap(nil, fmt.Sprintf("%v", derr))
			}
		}
	}()

which might tell us where the panic is being caused.

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

$ docker exec 810f1bf1bc39 backup
runtime error: invalid memory address or nil pointer dereference: goroutine 1 [running]:
runtime/debug.Stack()
        /usr/local/go/src/runtime/debug/stack.go:24 +0x5e
main.runScript.func1()
        /app/cmd/backup/run_script.go:22 +0x74
panic({0xcb9480?, 0x148f640?})
        /usr/local/go/src/runtime/panic.go:770 +0x132
main.(*script).stopContainersAndServices(0xc000381080)
        /app/cmd/backup/stop_restart.go:157 +0x79e
main.runScript.func4.1.1()
        /app/cmd/backup/run_script.go:61 +0x32
main.runScript.func4.1.(*script).withLabeledCommands.2()
        /app/cmd/backup/exec.go:211 +0x1b3
main.runScript.func4.1(0xc000381080)
        /app/cmd/backup/run_script.go:75 +0xbf
main.runScript.func4(0xc000381080)
        /app/cmd/backup/run_script.go:89 +0x1c
main.runScript(0xde4b18?)
        /app/cmd/backup/run_script.go:111 +0x1a5
main.(*command).runAsCommand(0xc00014a000?)
        /app/cmd/backup/command.go:39 +0x7d
main.main()
        /app/cmd/backup/main.go:22 +0x27c

time=2024-04-15T11:49:29.324Z level=ERROR msg="Fatal error running command: runtime error: invalid memory address or nil pointer dereference" error="main.(*command).runAsCommand: error running script: main.runScript.func1: unexpected panic running script: runtime error: invalid memory address or nil pointer dereference"

@m90
Copy link
Member

m90 commented Apr 15, 2024

Thanks, that's very helpful as I now can see where the panic is being caused:

initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,

Is a service you are trying to stop / restart not deployed in replicated mode?

@m90
Copy link
Member

m90 commented Apr 15, 2024

Which would still not explain why the test case fails for you as well.

@m90
Copy link
Member

m90 commented Apr 15, 2024

There is indeed a bug in there: the error returned from ServiceList is currently being checked after looping over the list of services, so I would think in your particular case, an error is being returned, but the code fails to handle it. Could you maybe flip the code in stop_restart.go to read

	if isDockerSwarm {
		allServices, err = s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
		if err != nil {
			return noop, errwrap.Wrap(err, "error querying for services")
		}
		matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
			Filters: filters.NewArgs(filters.KeyValuePair{
				Key:   "label",
				Value: filterMatchLabel,
			}),
			Status: true,
		})
		if err != nil {
			return noop, errwrap.Wrap(err, "error querying for services to scale down")
		}
		for _, s := range matchingServices {
			servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
				serviceID:           s.ID,
				initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
			})
		}
	}

and see what kind of error you receive?

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

Good catch!

I've stopped all stacks except the example one and ran the program, now it works fine.

However, having a container anywhere (even outside of the current stack) deployed with "mode: global" and the "backup" flag set via the label results in throwing this error still:

$ docker exec b28acd03c0d7 backup
runtime error: invalid memory address or nil pointer dereference: goroutine 1 [running]:
runtime/debug.Stack()
        /usr/local/go/src/runtime/debug/stack.go:24 +0x5e
main.runScript.func1()
        /app/cmd/backup/run_script.go:22 +0x74
panic({0xcb9480?, 0x148f640?})
        /usr/local/go/src/runtime/panic.go:770 +0x132
main.(*script).stopContainersAndServices(0xc000379080)
        /app/cmd/backup/stop_restart.go:160 +0x7be
main.runScript.func4.1.1()
        /app/cmd/backup/run_script.go:61 +0x32
main.runScript.func4.1.(*script).withLabeledCommands.2()
        /app/cmd/backup/exec.go:211 +0x1b3
main.runScript.func4.1(0xc000379080)
        /app/cmd/backup/run_script.go:75 +0xbf
main.runScript.func4(0xc000379080)
        /app/cmd/backup/run_script.go:89 +0x1c
main.runScript(0xde4b18?)
        /app/cmd/backup/run_script.go:111 +0x1a5
main.(*command).runAsCommand(0xc000140000?)
        /app/cmd/backup/command.go:39 +0x7d
main.main()
        /app/cmd/backup/main.go:22 +0x27c

time=2024-04-15T12:11:54.718Z level=ERROR msg="Fatal error running command: runtime error: invalid memory address or nil pointer dereference" error="main.(*command).runAsCommand: error running script: main.runScript.func1: unexpected panic running script: runtime error: invalid memory address or nil pointer dereference"

So for example the following setup fails:
$ cat backup-test.yml

# Copyright 2020-2021 - offen.software <[email protected]>
# SPDX-License-Identifier: Unlicense

version: '3.8'

services:
  minio:
    image: minio/minio:RELEASE.2020-08-04T23-10-51Z
    deploy:
      restart_policy:
        condition: on-failure
    environment:
      MINIO_ROOT_USER: test
      MINIO_ROOT_PASSWORD: test
      MINIO_ACCESS_KEY: test
      MINIO_SECRET_KEY: GMusLtUmILge2by+z890kQ
    entrypoint: /bin/ash -c 'mkdir -p /data/backup && minio server /data'
    volumes:
      - backup_data:/data

  backup:
    image: backup-test:latest
    depends_on:
      - minio
    deploy:
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.role == manager
    environment:
      AWS_ACCESS_KEY_ID: test
      AWS_SECRET_ACCESS_KEY: GMusLtUmILge2by+z890kQ
      AWS_ENDPOINT: minio:9000
      AWS_ENDPOINT_PROTO: http
      AWS_S3_BUCKET_NAME: backup
      BACKUP_FILENAME: test.tar.gz
      BACKUP_CRON_EXPRESSION: 0 0 5 31 2 ?
      BACKUP_RETENTION_DAYS: 7
      BACKUP_PRUNING_LEEWAY: 5s
    volumes:
      - pg_data:/backup/pg_data:ro
      - /var/run/docker.sock:/var/run/docker.sock

  offen:
    image: offen/offen:latest
    labels:
      - docker-volume-backup.stop-during-backup=true
    healthcheck:
      disable: true
    deploy:
      replicas: 2
      restart_policy:
        condition: on-failure

  pg:
    image: postgres:14-alpine
    environment:
      POSTGRES_PASSWORD: example
    labels:
      - docker-volume-backup.stop-during-backup=true
    volumes:
      - pg_data:/var/lib/postgresql/data
    deploy:
      restart_policy:
        condition: on-failure

volumes:
  backup_data:
    name: backup_data
  pg_data:
    name: pg_data

$ cat influx_test.yml

version: '3.3'

services:
  influxdb:
    image: influxdb:1.8
    volumes:
      - influx-data:/var/lib/influxdb
    networks:
      - net
    deploy:
      labels:
        - docker-volume-backup.stop-during-backup=true
      resources:
        limits:
          cpus: '0.60'
          memory: 512M
        reservations:
          cpus: '0.30'
          memory: 128M
      mode: global
      placement:
        constraints:
          - node.role == manager

networks:
  net:
    driver: overlay

volumes:
  influx-data:
    driver: local
$ docker stack deploy -c influx_test.yml influx
$ docker stack deploy -c backup-test.yml backup-test
$ docker ps
53cbc4172d27   offen/offen:latest   "/sbin/tini -- offen"    About a minute ago   Up About a minute   80/tcp, 443/tcp                                                                                                       backup-test_offen.1.ptt21iuhlxj5c7pky3pxfkfzk
ec39062b6c90   influxdb:1.8         "/entrypoint.sh infl…"   About a minute ago   Up About a minute   8086/tcp                                                                                                              influx_influxdb.1.6yk4u6qugi2qd2aj1v45vz9cw
b28acd03c0d7   backup-test:latest   "/usr/bin/backup -fo…"   2 minutes ago        Up 2 minutes                                                                                                                              backup-test_backup.1.v0k7plmt2frvfijvfrs58g9
$ docker exec b28acd03c0d7 backup
<throws error>

Removing the label in the influx.yml file and it works fine afterwards.

@m90
Copy link
Member

m90 commented Apr 15, 2024

However, having a container anywhere (even outside of the current stack) deployed with "mode: global" and the "backup" flag set via the label results in throwing this error still.

Is that using the patch suggested in #403 (comment) or without?


Labeling globally deployed services is not supported, and tbh I don't know how it could be supported as they can't be scaled down as easily. Not sure what the container level approach does in that case though https://offen.github.io/docker-volume-backup/how-tos/use-with-docker-swarm.html

@lyze237
Copy link
Author

lyze237 commented Apr 15, 2024

Yeah that's with the patch included:

$ git diff
diff --git a/cmd/backup/run_script.go b/cmd/backup/run_script.go
index b9ada32..d636f3e 100644
--- a/cmd/backup/run_script.go
+++ b/cmd/backup/run_script.go
@@ -6,7 +6,7 @@ package main
 import (
        "errors"
        "fmt"
-
+       "runtime/debug"
        "github.com/offen/docker-volume-backup/internal/errwrap"
 )

@@ -19,6 +19,7 @@ func runScript(c *Config) (err error) {
                if derr := recover(); derr != nil {
                        asErr, ok := derr.(error)
                        if ok {
+                               fmt.Printf("%s: %s\n", asErr, debug.Stack())
                                err = errwrap.Wrap(asErr, "unexpected panic running script")
                        } else {
                                err = errwrap.Wrap(nil, fmt.Sprintf("%v", derr))
diff --git a/cmd/backup/stop_restart.go b/cmd/backup/stop_restart.go
index 7fc558c..1c0e761 100644
--- a/cmd/backup/stop_restart.go
+++ b/cmd/backup/stop_restart.go
@@ -151,15 +151,15 @@ func (s *script) stopContainersAndServices() (func() error, error) {
                        }),
                        Status: true,
                })
+               if err != nil {
+                       return noop, errwrap.Wrap(err, "error querying for services to scale down")
+               }
                for _, s := range matchingServices {
                        servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
                                serviceID:           s.ID,
                                initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
                        })
                }
-               if err != nil {
-                       return noop, errwrap.Wrap(err, "error querying for services to scale down")
-               }
        }

        if len(containersToStop) == 0 && len(servicesToScaleDown) == 0 {
@@ -359,3 +359,4 @@ func (s *script) stopContainersAndServices() (func() error, error) {
                return nil
        }, initialErr
 }
+

Yeah I'm fine with it not working in global mode, just wanted to mention in case the patch should throw a proper error instead of a stacktrace.

Thanks for your help as well btw! :)

@m90
Copy link
Member

m90 commented Apr 15, 2024

Yeah I'm fine with it not working in global mode, just wanted to mention in case the patch should throw a proper error instead of a stacktrace.

I think I know what's going on (there's no error, but also no replica info), will include it in my PR, thanks for the input.

@m90
Copy link
Member

m90 commented Apr 15, 2024

This is fixed in v2.39.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants