-
Notifications
You must be signed in to change notification settings - Fork 57
/
zrep_sync
906 lines (741 loc) · 24.6 KB
/
zrep_sync
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
##### File: zrep_sync
# contains meat of the "sync" level operations, which deal with
# data transfer.
# basic snap routines, and init routines, are in zrep_snap
## file-internal routine that gets used a lot in zrep_sync. but not always
_gensentprop(){
typeset timeinsec=`zrep_gettimeinseconds`
echo "${ZREPTAG}:sent=$timeinsec"
}
# This is a RECOVERY ROUTINE ONLY.
# I put lots of sanity checking in here, that doesnt make sense to keep
# with a more general case internal routine.
# This exists, because certain people say that for some odd reason on their systems,
# the zfs send completes, but zrep gets killed before zrep updates properties.
# To help people save the time on resyncing hundreds of TB,
# give them a way to update the sent property.
#
# This only works with newstyle ZFS that allows property setting on snapshots
# Needs to follow whatever is done in _sync(), after the zfs send
# (and therefore getlastsnapsent() as well)
#
# Usage: zrep_sentsync [-L] fs@snap
#
# WARNING: If you have nested zrep filesystem below this one.. you just hosed yourself.
#
zrep_sentsync(){
typeset local=0
if [[ "$1" == "-L" ]] ; then
local=1;
shift
fi
typeset srcsnap="$1"
typeset srcfs=${srcsnap%@*}
typeset snapname=${srcsnap#*@}
if (( ! Z_HAS_SNAPPROPS )) ; then
zrep_errquit This sentsync operation only supported with modern ZFS implementations
fi
zfs list -t snapshot "$1" >/dev/null 2>&1
if [[ $? -ne 0 ]] ; then
zrep_errquit Expected snapshot for $1. Cannot continue
fi
case "$1" in
*@${ZREPTAG}_[0-9a-f]*)
:
;;
*)
zrep_errquit $1 does not follow zrep naming standards. Cannot continue
;;
esac
typeset sentcheck=`$ZFSGETLVAL ${ZREPTAG}:sent $srcfs`
if [[ "$sentcheck" != "" ]] ; then
zrep_errquit ${ZREPTAG}:sent already present on $srcfs
fi
_clearlast $srcfs
typeset senttimeprop="`_gensentprop`"
if ((local == 0)) ; then
typeset desthost destfs
desthost=`$ZFSGETVAL ${ZREPTAG}:dest-host $srcfs`
destfs=`$ZFSGETVAL ${ZREPTAG}:dest-fs $srcfs`
zrep_ssh $desthost zfs set $senttimeprop $destfs@$snapname
fi
zfs set $senttimeprop ${srcsnap}
# This will be redundant for recovery, but crucial for when
# user is trying to convert existing snapshot to
# zrep snapshot.
# Make sure it matches zrep_init
zfs set ${ZREPTAG}:master=yes ${srcfs}
}
####################
# synctosnap: called by zrep_sync, if a specific snapshot is specified.
#
# This LOCAL side, *and* REMOTE side, match up with local zrep_created
# snapshot. ...
#
# Note that it uses zrep_lock_fs
#
# WARNING: if we force other side to roll to snap....
# we should NOT BE SYNCING ANY more.
# At the moment, it is up to the user to ensure that nothing is going on
# locally, and future zrep syncs wont just effectively roll forward again
# on the remote side.
# zrep sync jobs should probably be halted, until it is decided that
# you want to sync again.
#
# In the future, I should support some kind of "pause" option, for
# zrep sync all to ignore a rolled back filesystem
#
#
synctosnap(){
typeset srcsnap=$1 destfs=$2 desthost=$3
typeset newsentlist
typeset srcfs snapname destsnap
if [[ "$desthost" == "" ]] ; then
echo ERROR: synctosnap did not receive all required args
zrep_errquit "args=$@"
fi
srcfs=${srcsnap%@*}
snapname=${srcsnap#*@}
destsnap=${snapname}
# Have to enforce OUR syntax. otherwise, any future attempt to
# continue sync will fail.
# ( getlastsnap() wont find it! )
#
case $snapname in
${ZREPTAG}_[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]*)
: ;;
*)
zrep_errquit $srcsnap is not zrep snapshot. Cannot roll with it.
;;
esac
echo Validating remote snap
zrep_ssh $desthost zfs list -t snapshot $destfs@$destsnap >/dev/null
if [[ $? -ne 0 ]] ; then
zrep_errquit $destfs@$destsnap does not exist. Cannot roll to snap
fi
echo "WARNING: We will be rolling back $destfs, on $desthost"
echo -n " to $snapname, made at: "
$ZFSGETVAL creation $srcsnap
echo ""
echo "All newer snapshots on remote side will be destroyed"
echo "You should have paused ongoing sync jobs for $destfs before continuing"
echo "Continuing in 20 seconds...."
sleep 10
echo "Continuing in 10 seconds...."
sleep 10
zrep_lock_fs $srcfs || zrep_errquit "Cannot lock $srcfs"
zrep_ssh $desthost zfs rollback -Rr $destfs@$destsnap || zrep_errquit roll failed
echo $desthost:$destfs rolled back successfully to $destsnap
echo Now cleaning up local snapshots
# need to undo whatever zrep_sync does
newsentlist=`getallsnaps $srcfs|sed "1,/@$snapname/d"`
for snap in $newsentlist ; do
zfs inherit ${ZREPTAG}:sent $snap
done
zrep_unlock_fs $srcfs
}
#
# called by _sync
# Check if there have been changes since specified snap
# If no, then return 0 == true [no changes ]
#
_sync_nochanges(){
typeset changecheck tmpfile="$ZREP_RUNDIR/zrep.$$.c"
rm -f $tmpfile
zfs diff -H $1 >$tmpfile
if [[ $? -ne 0 ]] ; then
_errprint _sync_nochanges: zfs diff command unrecognized
rm -f $tmpfile
return 1
fi
if test -s $tmpfile ; then
rm -f $tmpfile
return 1
fi
_debugprint sync_nochanges did not find any changes
rm -f $tmpfile
return 0
}
# Usage: _snapandsync fs desthost destfs
# internal routine called by zrep_sync and zrep_failover,
# to do an incremental send.
# You must hold filesystem lock before calling this
# WE DO NOT DO ANY SAFETY OR LOCK CHECKS HERE.
# Caller is expected to have done them.
#
# Wil create a new snap on srcfs, and sync it over to given destination
# Sets our 'synced' marker on it as well.
#
_snapandsync(){
typeset srcfs=$1 desthost=$2 destfs=$3
typeset sentsnap newsnap snapname
# Find incremental send starting point
# Do this BEFORE creating new snap, because we should make new snap
# if we cant do incremental anyway
sentsnap=`getlastsnapsent $srcfs`
if [[ "$sentsnap" == "" ]] ; then
echo zrep_sync could not find sent snap for $srcfs.
zrep_errquit You must initialize $srcfs for zrep
fi
if [[ "$ZREP_CHANGEDONLY" != "" ]] ; then
if _sync_nochanges $sentsnap ; then
_debugprint No changes found in $srcfs. Updating timestamp only
if (( Z_HAS_SNAPPROPS )) ; then
typeset senttimeprop="`_gensentprop`"
zfs set $senttimeprop ${sentsnap}
else
#note that this is only for old-ZFS compatibility.
# We dont really want to use this style if possible!
typeset timeinsec=`zrep_gettimeinseconds`
zfs set ${ZREPTAG}:lastsenttime=${timeinsec} $srcfs
fi
return 0
fi
fi
newsnap=`makesnap $srcfs`
if [[ "$newsnap" == "" ]] ; then
zrep_errquit zrep_sync could not create new snapshot for $srcfs
fi
_sync $srcfs $desthost $destfs $sentsnap $newsnap
}
# called by _snapandsync, and also zrep_synconly
# Usage: _sync sourcefs destinationhost destinationfs (lastsent (newsnap))
# This is the level that calls zfs directly.
# see also _refreshpull, since it also calls directly.
_sync(){
typeset force verbose
typeset token="" recv_s=""
if [[ "$ZREP_FORCE" == "-f" ]] ; then
force=-F
fi
if [[ "$ZREP_VERBOSE" != "" ]] ; then
verbose=-v
fi
if [[ "$ZREP_RESUME" != "" ]] ; then
recv_s=-s
fi
typeset srcfs=$1 desthost=$2 destfs=$3
typeset lastsent=$4 newsnap=$5
typeset snapname
if [[ "$lastsent" == "" ]] ; then
lastsent=`getlastsnapsent $srcfs`
if [[ "$lastsent" == "" ]] ; then
echo zrep_sync could not find sent snap for $srcfs.
zrep_errquit You must initialize $srcfs for zrep
fi
fi
if [[ "$newsnap" == "" ]] ; then
newsnap=`getlastsnap $srcfs`
if [[ "$newsnap" == "" ]] ; then
echo zrep_sync could not find sent snap for $srcfs.
zrep_errquit You must initialize $srcfs for zrep
fi
fi
if [[ "$newsnap" == "$lastsent" ]] ; then
echo $newsnap already sent
return 0
fi
typeset remotemaster
remotemaster=`zrep_ssh $desthost $ZFSGETLVAL ${ZREPTAG}:master $destfs`
if [[ $? -ne 0 ]] ; then
zrep_errquit "$desthost is not reachable via ssh? Cannot sync"
fi
if [[ "$remotemaster" == "yes" ]] ; then
zrep_errquit "Other side ($desthost:$destfs) is also master. Split brain detected"
fi
snapname=${newsnap#*@}
# do this manually, not using gensentprop because we want consistant timestamp
typeset timeinsec=`zrep_gettimeinseconds`
typeset senttimeprop="${ZREPTAG}:sent=$timeinsec"
echo sending $newsnap to $desthost:$destfs
if [[ "$ZREP_RESUME" != "" ]] ; then
token=`zrep_ssh $desthost $ZFSGETVAL receive_resume_token $destfs`
if [[ "$token" == "-" ]] ; then token="" ; fi
fi
# Note: doing "-o $senttimeprop" sets prop on FILESYSTEM, not snap.
# So we dont do that usually
# other than zrep_init, this should be the ONLY place we do a send
# Sigh. but now we also do in _refreshpull
if [[ "$BBCP" != "" ]] ; then
if [[ "$token" != "" ]] ; then
SENDCMD="zfs send -t $token ${ZREP_RESUME_FLAGS}"
else
SENDCMD="zfs send $verbose ${ZREP_R} ${ZREP_SEND_FLAGS} ${ZREP_INC_FLAG} $lastsent $newsnap"
fi
$BBCP -N io "$SENDCMD" \
"$desthost:zfs recv $recv_s $force $destfs"
else
if [[ "$token" != "" ]] ; then
eval zfs send ${ZREP_RESUME_FLAGS} -t $token ${Z_F_OUT} |
zrep_ssh $desthost "${Z_F_IN} zfs recv $recv_s $force $destfs"
else
eval zfs send $verbose ${ZREP_R} ${ZREP_SEND_FLAGS} ${ZREP_INC_FLAG} $lastsent $newsnap ${Z_F_OUT} |
zrep_ssh $desthost "${Z_F_IN} zfs recv $recv_s $force $destfs"
fi
fi
# I rename this to _unsent rather than just delete, in case people are using zrep
# for the DUAL use, of replication,
# plus convenient user-based "oops" recovery from the automatic .zfs/snapshots directory
# But if resume support enabled, should auto-retry next time sync called
if [[ $? -ne 0 ]] ; then
if [[ "$ZREP_RESUME" == "" ]] ; then
if [[ "$ZREP_RENAME_UNSENT" == "yes" ]] ; then
zfs rename ${Z_SNAP_R} ${newsnap} ${newsnap}_unsent
zrep_errquit Problem doing sync for $newsnap. Renamed to ${newsnap}_unsent
else
zfs destroy ${Z_SNAP_R} ${newsnap}
fi
fi
zrep_errquit Problem doing sync for $newsnap.
fi
##################################################
##### Okay. data sync completed. Now register that fact with ZFS properties.
##### If you modify below here, you also need to update zrep_sentsync
#Even if we are "old mode", other side may not be.
# So try newer way first.
zrep_ssh $desthost zfs set $senttimeprop $destfs@$snapname
if [[ $? -ne 0 ]] ; then
echo WARNING: setting ${ZREPTAG}:sent failed on $desthost:$destfs@$snapname
echo Using fallback methods. You should go patch $destfs to have newer ZFS version
zrep_ssh $desthost zfs set ${ZREPTAG}:lastsent=${newsnap} $destfs
zrep_ssh $desthost zfs set ${ZREPTAG}:lastsenttime=${timeinsec} $destfs
fi
if (( Z_HAS_SNAPPROPS )) ; then
zfs set $senttimeprop ${newsnap}
else
#note that this is only for old-ZFS compatibility.
# We dont really want to use this style if possible!
zfs set ${ZREPTAG}:lastsent=${newsnap} $srcfs
zfs set ${ZREPTAG}:lastsenttime=${timeinsec} $srcfs
fi
}
#User entrypoint, for synconly, which is the pair of snaponly
#Keep it paired with zrep_sync
zrep_synconly(){
# annoyingly..need to make this almost identical to our current full
# zrep_sync. but just skipping first steps :(
# we can skip retries, though.
typeset srcfs desthost destfs
if [[ "$1" == "all" ]] ; then
set -- `zrep_list_master`
if [[ "$1" == "" ]] ; then
exit
fi
fi
[[ "$1" == "" ]] && zrep_errquit No fileystem specified for synconly
while [[ "$1" != "" ]] ; do
srcfs=$1
check=`$ZFSGETLVAL ${ZREPTAG}:master $srcfs`
if [[ "$check" != "yes" ]] ; then
zrep_errquit $srcfs not master. Cannot sync
fi
desthost=`$ZFSGETVAL ${ZREPTAG}:dest-host $srcfs`
destfs=`$ZFSGETVAL ${ZREPTAG}:dest-fs $srcfs`
if [[ $? -ne 0 ]] || [[ "$desthost" == "-" ]] || [[ "$destfs" == "-" ]];
then
zrep_errquit Problem getting zrep properties for fs $srcfs
fi
zrep_lock_fs $srcfs
if [[ $? -ne 0 ]] ; then
zrep_errquit Failed to acquire zrep lock for $srcfs
fi
_sync $srcfs $desthost $destfs || zrep_errquit sync failed for $srcfs
_expire $srcfs #dont care so much if this fails
zrep_unlock_fs $srcfs
shift
done
}
#zrep_sync
# User entrypoint
# Make a new snapshot and copy it over.
# Usage: zrep_sync [-q quiettime] (all|fs1 .. fsX)
# See workflow.txt
# SPECIAL CASE: Will call synctosnap if a snapshot is given instead of fsname
# Normally, will bail out if another instance of zrep holds lock.
# -q option says to check last update time of locked filesystems.
# If sync more recent than given quiettime, then quietly ignore
#
zrep_sync(){
# If you make changes in here, check if needed in zrep_synconly!!
typeset srcfs destfs desthost sentsnap newsnap
typeset quiettime=0
if [[ "$1" == "-c" ]] ; then
export ZREP_CHANGEDONLY="yes"
shift
fi
if [[ "$1" == "-q" ]] ; then
quiettime="$2"
shift
shift
if (( quiettime < 30 )) ; then
zrep_errquit "-q must use value greater than 30"
fi
fi
if [[ "$1" == "all" ]] ; then
set -- `zrep_list_master`
if [[ "$1" == "" ]] ; then
# Stay quiet, so we dont spew if in cron
#echo No zrep mastered filesystems found
exit
fi
fi
[[ "$1" == "" ]] && zrep_errquit No fileystem specified for sync
# Special Case. User can force sync from specific snapshot
case $1 in
*@*)
srcfs="$1"
desthost=`$ZFSGETVAL ${ZREPTAG}:dest-host $srcfs`
destfs=`$ZFSGETVAL ${ZREPTAG}:dest-fs $srcfs`
synctosnap $srcfs $destfs $desthost
return
;;
esac
while [[ "$1" != "" ]] ; do
srcfs="$1"
check=`$ZFSGETLVAL ${ZREPTAG}:master $srcfs`
if [[ "$check" != "yes" ]] ; then
zrep_errquit $srcfs not master. Cannot sync
fi
desthost=`$ZFSGETVAL ${ZREPTAG}:dest-host $srcfs`
destfs=`$ZFSGETVAL ${ZREPTAG}:dest-fs $srcfs`
if [[ $? -ne 0 ]] || [[ "$desthost" == "-" ]] || [[ "$destfs" == "-" ]];
then
zrep_errquit Problem getting zrep properties for fs $srcfs
fi
zrep_lock_fs $srcfs
if [[ $? -ne 0 ]] ; then
# retry for lock for a while, if (quiettime>0 )
if ((quiettime==0)); then
zrep_errquit Cannot lock $srcfs. Cannot continue
fi
typeset currtime=`zrep_gettimeinseconds` snaptime elapsed
snaptime=`getlastsynctime $srcfs`
if (( snaptime == 0 )) ; then
zrep_errquit quiet mode set, but no last snap for $srcfs
fi
elapsed=$((currtime - snaptime))
if ((elapsed > quiettime)) ; then
_debugprint $elapsed seconds have elapsed since last sync of $srcfs
zrep_errquit quiet time limit of $quiettime seconds exceeded for busy fs $srcfs
else
echo Quiet mode: skipping busy fs $srcfs at `date`
return
fi
fi
_snapandsync $srcfs $desthost $destfs
# Make this message match what zrep_expire uses..
echo Expiring zrep snaps on $srcfs
_expire $srcfs
zrep_unlock_fs $srcfs
shift
done
}
# Usage: zrep_refresh fsname
#
# zrep_refresh is a "pull" version of "zrep_sync"
# The concept is a bit of a hack.
# It primarily exists so people can run a secure backup server, that
# has ssh access to all hosts, but not vice versa
#
# Implementation is a bit sketchy.
# For initial, non-optimal run, perhaps take advantage of
# ssh host zrep synconly
# to avoid too much duplication of things?
# but will still need to set all the perms n things. Nastyyy..
# The MAIN nastiness, is that all our locks are on the "master" side.
# Which depends on the PID still being there!!
# But if we start now running things on the "slave" side..
# There is potential for problems
# Examine critical points and reasons for lock:
# 1. while doing analysis of which snap to send
# 2. to avoid paralel "zfs send"s running.
# 3. for update of timestamp
#
# We can still wrap #1 and #2 in a single lock call.
# (and still on the src side!)
# The ugly comes when updating zrep:sent. Dont want to update wrong snap!
# So long as we do some kind of check to see that we're not going
# backwards when we get lock a second time ... we should be relatively okay.
# However.. for simplicity... going to just cross fingers and wrap
# all three in single remote lock call, through _refreshpull
#
zrep_refresh(){
typeset srcfs destfs desthost newsnap newseq master
typeset force
if [[ "$ZREP_FORCE" == "-f" ]] ; then
force=-F
fi
if [[ "$1" == "-r" ]] ; then
export ZREP_RESUME=1
shift
fi
# for now, just handle ONE fs, not multiple fs list
destfs="$1"
if [[ "$1" == "" ]] ; then
_errprint Error: no filesystems specified for refresh
return 1
fi
master=`$ZFSGETLVAL ${ZREPTAG}:master $destfs`
if [[ "$master" == "yes" ]] ; then
zrep_errquit Sorry, you cant run refresh on a master mode fs $destfs
fi
srchost=`$ZFSGETVAL ${ZREPTAG}:src-host $destfs`
srcfs=`$ZFSGETVAL ${ZREPTAG}:src-fs $destfs`
zrep_lock_fs $destfs
if [[ $? -ne 0 ]] ; then
zrep_errquit Cannot lock $destfs. Cannot continue
fi
_debugprint refresh step 1: Going to $srchost to snapshot $destfs
newsnap=`zrep_ssh $srchost $ZREP_PATH ${ZREP_R} snaponly $srcfs`
if [[ $? -ne 0 ]] ; then
zrep_errquit snap of src $srcfs on $srchost failed
fi
# yes, MORE paranoia..
case $newsnap in
*@${ZREPTAG}_*)
newseq=${newsnap#*@}
;;
*)
zrep_errquit Unrecognized output from src snap. Cannot continue
;;
esac
typeset senttimeprop="`_gensentprop`"
_debugprint refresh step 2: Pulling $newsnap
if [[ "$ZREP_RESUME" != "" ]] ; then
token=`$ZFSGETVAL receive_resume_token $destfs`
if [[ "$token" == "-" ]] ; then token="" ; fi
fi
if [[ "$BBCP" != "" ]] ; then
$BBCP -N io "$srchost:$ZREP_PATH _refreshpull $newsnap $token" \
"zfs recv $force $destfs"
else
zrep_ssh $srchost "$ZREP_PATH ${ZREP_R} _refreshpull $newsnap $token ${Z_F_OUT}" |
eval ${Z_F_IN} zfs recv $force $destfs
fi
if [[ $? -ne 0 ]] ; then
zrep_errquit Unforseen error pulling snapshot $newsnap from $srchost
fi
zfs set $senttimeprop $destfs@$newseq
if [[ $? -ne 0 ]] ; then
_errprint WARNING: expected local copy $destfs@newseq does not exist
fi
zrep_ssh $srchost $ZREP_PATH _refreshcomplete $newsnap $senttimeprop
_debugprint Running local expires on $destfs
_expire $destfs
_debugprint Running remote expires on $srcfs
sleep 1 # avoid race condition on samehost situation
zrep_ssh $srchost "$ZREP_PATH expire -L $srcfs"
zrep_unlock_fs $destfs
}
# Implementation for hidden command-line option, "zrep _refreshpull"
# This is called remotely by zrep refresh
# ( aka zrep_refresh )
# We dont just call "ssh zfs send", because we want to use zrep locking
#
# This routine is definitely not supposed to be user visible
# .. eh... maybe someday. but initial design is "private"
#
# Note that this is only called on a per-filesystem name
#
# Syntax: _refreshpull fs/name [optional resume token]
#
_refreshpull(){
typeset fs snapname lastsent latest verbose
typeset token=""
if [[ "$2" != "" ]] ; then
token="$2"
fi
if [[ "$ZREP_VERBOSE" != "" ]] ; then
verbose=-v
fi
snapname="$1"
fs=${snapname%@*}
# Keep in mind that stdin/out is busy so have to use stderr.
# Cant use regular debugprint
if [[ "$DEBUG" != "" ]] ; then
_errprint _refreshpull: snapname=$snapname, fs=$fs
fi
zrep_lock_fs $fs
if [[ $? -ne 0 ]] ; then
zrep_errquit Could not lock $fs
fi
#We should now;
# 1. compare to latest snap. quit if not latest
# 2. get timestamp
# 3. trigger a zfs send
# 4. set timestamp if no errors.
# I think it is reasonable to presume that if the receive failed,
# we will see an error by the pipe blowing up.
#
lastsent=`getlastsnapsent $fs`
if [[ "$lastsent" == "" ]] ; then
zrep_errquit Canthappen: _refreshpull cant findlastsent snap
fi
latest=`getlastsnap $fs`
if [[ "$latest" != "$snapname" ]] ; then
zrep_errquit Sync error: $snapname is not latest snap for $fs
fi
if (( Z_HAS_SNAPPROPS ==0)) ; then
zrep_errquit Error: we currently only support modern ZFS that allows setting props on snaps
fi
if [[ "$token" != "" ]] ; then
zfs send ${ZREP_RESUME_FLAGS} -t $token
else
zfs send $verbose $token ${ZREP_R} ${ZREP_SEND_FLAGS} ${ZREP_INC_FLAG} $lastsent $latest
fi
if [[ $? -ne 0 ]] ; then
zrep_errquit Some kind of error during sending. Bailing out of _refreshpull
fi
zrep_unlock_fs $fs
}
# INTERNAL-ONLY HOOK for "zrep refresh"
#
# This used to be in _refreshpull. However, that led to a race condition where a
# "zfs send" had been completed, and "last sent" had been updated prematurely.
# if the zfs receive didnt complete, then things were messed up.
# Usage:
# _refreshcomplete fs@snapshot propertysetting=value
#
_refreshcomplete(){
zfs set ${2} ${1}
}
# usage:
# snap_olderthan snapname minutes
# returns true (0) if snap older than given number of minutes,
# based on 'creation' property.
# (technically this should work on regular fs as well)
#
# **NOTE**
# MINUTES, not seconds, because property format looks like this;
# $ zfs get -H creation scratch
# scratch creation Sun Dec 1 21:55 2019 -
#
snap_olderthan(){
typeset cstamp=`$ZFSGETVAL creation $1`
typeset ageinseconds=`printf "%(%s)T" "$cstamp"`
typeset secondmark=`printf "%(%s)T" "$2 minutes ago"`
if (( $secondmark < 1 )) ; then
zrep_errquit ERROR: snap_olderthan passed invalid minute limit: $2
fi
if (( ageinseconds < secondmark )) ; then
return 0
fi
return 1
}
# _expire:
# get rid of "old" snapshots for a specifically named filesystem
#
# Note0: you must hold local(master) fs lock first
#
# Note1: expire BOTH SIDES, if we are master
# Keep in mind that sometimes master and dest are on same system
#
# Note2: Be sure to NEVER delete most recent sent snapshot!!
# INTERNAL routine. For external-facing routine, see zrep_expire
_expire(){
if [[ "$ZREP_SKIP_EXPIRE" != "" ]] ; then
_debugprint _expire doing nothing since ZREP_SKIP_EXPIRE set
return
fi
typeset savecount currcount lastsent remotehost remotefs sanity
typeset tmpfile=$ZREP_RUNDIR/zrep_expire.$$
typeset local=0 master
if [[ "$1" == "-L" ]] ; then
local=1;
shift
fi
master=`$ZFSGETLVAL ${ZREPTAG}:master $1`
zrep_has_fs_lock $1 || zrep_errquit zrep_expire Internal Err caller did not hold fs lock on $1
# Allow propagated values as well as local, for savecount
savecount=`$ZFSGETVAL ${ZREPTAG}:savecount $1`
# do not use (()) in case value unset
if [[ $savecount < 1 ]] ; then
zrep_errquit ${ZREPTAG}:savecount on $1 set to improper value $savecount
fi
rm -f $tmpfile
if [[ "$master" == "yes" ]] ; then
lastsent=`getlastsnapsent $1`
if [[ "$lastsent" == "" ]] ; then
zrep_errquit corrupted zrep data: no last sent detected. Stopping expire
fi
# Note that getallsnaps does an explicit sort already.
# We want to not expire lastsent, OR any later unsent ones!
getallsnaps $1 | awk '$1 == "'$lastsent'"{exit} {print}' >$tmpfile
savecount=$((savecount-1))
else
getallsnaps $1 >$tmpfile
fi
currcount=`wc -l < $tmpfile`
if ((currcount > savecount )) ; then
currcount=$((currcount - savecount))
head -$currcount $tmpfile >$tmpfile.2
mv $tmpfile.2 $tmpfile
for snap in `cat $tmpfile` ; do
_debugprint expiring $snap
# Paranoia is good.
case $snap in
*@*)
zfs destroy -r $snap
;;
*)
zrep_errquit "Expire was about to destroy NON-snapshot $snap"
;;
esac
done
fi
rm $tmpfile
if [[ "$master" != "yes" ]] || ((local ==1)) ; then
#This fs is dest fs. We are done.
return
#otherwise, go expire on remote side as well
fi
remotehost=`$ZFSGETVAL ${ZREPTAG}:dest-host $1`
remotefs=`$ZFSGETVAL ${ZREPTAG}:dest-fs $1`
echo Also running expire on $remotehost:$remotefs now...
sanity=`zrep_ssh $remotehost $ZFSGETLVAL ${ZREPTAG}:master $remotefs`
# Normally, dont quit on error. But this is super-bad.
if [[ "$sanity" == "yes" ]] ; then
_errprint "ERROR: Remote side $remotehost also marked as master."
zrep_errquit "ERROR: Split brain scenario detected"
fi
zrep_ssh $remotehost "$ZREP_PATH expire $remotefs" ||echo REMOTE expire failed
}
# top-level user-facing routine.
# expire old snaps for some or all zrep filesystems.
# Different ways of calling:
# zrep expire all Run expire on all zrep fs
# zrep expire Run expire on zrep fs we are master for, plus remote
# zrep expire -L Run expire on zrep fs we are master for. SKIP remote
# zrep expire fs .. Run expire only on fs, plus remote if it is a master
# zrep expire -L fs Run expire only on fs. Skip remote
#
# If no arg given, expire only filesystems we are master for
# If "all" given, expire literally all.
#
zrep_expire()
{
if [[ "$ZREP_SKIP_EXPIRE" != "" ]] ; then
zrep_errquit 'You explicitly called zrep expire.. and also have ZREP_SKIP_EXPIRE set?? How about no.'
fi
typeset local
if [[ "$1" == "-L" ]] ; then
local="-L"
shift
fi
if [[ "$1" == "all" ]] ; then
set -- `zrep_list`
elif [[ "$1" == "" ]] ; then
set -- `zrep_list_master`
fi
# Note: we should continue if we hit problems with an individual
# filesystem. Otherwise we risk letting server selfdestruct fill
# over one troublesome filesystem
#
while [[ "$1" != "" ]] ; do
zrep_lock_fs $1
echo Expiring zrep snaps on $1
_expire $local $1 || echo WARNING: expire failed for $1
zrep_unlock_fs $1
shift
done
}