Skip to content

Commit

Permalink
Merge pull request #8177 from filecoin-project/fix/fault-tracker-snap…
Browse files Browse the repository at this point in the history
…-deals

fix:proving:post check sector handles snap deals replica faults
  • Loading branch information
ZenGround0 authored Feb 24, 2022
2 parents c940727 + abe04c3 commit d711775
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 58 deletions.
2 changes: 1 addition & 1 deletion api/api_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ type StorageMiner interface {
// the path specified when calling CreateBackup is within the base path
CreateBackup(ctx context.Context, fpath string) error //perm:admin

CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin

ComputeProof(ctx context.Context, ssi []builtin.ExtendedSectorInfo, rand abi.PoStRandomness, poStEpoch abi.ChainEpoch, nv abinetwork.Version) ([]builtin.PoStProof, error) //perm:read
}
Expand Down
8 changes: 4 additions & 4 deletions api/proxy_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion api/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ var (
FullAPIVersion0 = newVer(1, 5, 0)
FullAPIVersion1 = newVer(2, 2, 0)

MinerAPIVersion0 = newVer(1, 3, 0)
MinerAPIVersion0 = newVer(1, 4, 0)
WorkerAPIVersion0 = newVer(1, 5, 0)
)

Expand Down
Binary file modified build/openrpc/miner.json.gz
Binary file not shown.
4 changes: 3 additions & 1 deletion cmd/lotus-miner/proving.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ var provingCheckProvableCmd = &cli.Command{
}

var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
si := abi.SectorID{
Miner: abi.ActorID(mid),
Expand All @@ -454,9 +455,10 @@ var provingCheckProvableCmd = &cli.Command{
ProofType: info.SealProof,
ID: si,
})
update = append(update, info.SectorKeyCID != nil)
}

bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, cctx.Bool("slow"))
bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, update, cctx.Bool("slow"))
if err != nil {
return err
}
Expand Down
3 changes: 3 additions & 0 deletions documentation/en/api-v0-methods-miner.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ Inputs:
"ProofType": 8
}
],
[
true
],
true
]
```
Expand Down
96 changes: 50 additions & 46 deletions extern/sector-storage/faults.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ import (

// FaultTracker TODO: Track things more actively
type FaultTracker interface {
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error)
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error)
}

// CheckProvable returns unprovable sectors
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
var bad = make(map[abi.SectorID]string)

ssize, err := pp.SectorSize()
Expand All @@ -32,72 +32,76 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
}

// TODO: More better checks
for _, sector := range sectors {
for i, sector := range sectors {
err := func() error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var fReplica string
var fCache string

locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}

if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}

lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}

// temporary hack to make the check work with snapdeals
// will go away in https://github.com/filecoin-project/lotus/pull/7971
if lp.Sealed == "" || lp.Cache == "" {
// maybe it's update
if update[i] {
lockedUpdate, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if lockedUpdate {
lp, _, err = m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
lp.Sealed, lp.Cache = lp.Update, lp.UpdateCache
if !lockedUpdate {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock on update replica", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector update replica in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Update, lp.UpdateCache
} else {
locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}

if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}

lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Sealed, lp.Cache

}

if lp.Sealed == "" || lp.Cache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", lp.Cache, lp.Sealed)
if fReplica == "" || fCache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", fReplica, "cache", fCache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", fCache, fReplica)
return nil
}

toCheck := map[string]int64{
lp.Sealed: 1,
filepath.Join(lp.Cache, "p_aux"): 0,
fReplica: 1,
filepath.Join(fCache, "p_aux"): 0,
}

addCachePathsForSectorSize(toCheck, lp.Cache, ssize)
addCachePathsForSectorSize(toCheck, fCache, ssize)

for p, sz := range toCheck {
st, err := os.Stat(p)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "err", err)
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "err", err)
bad[sector.ID] = fmt.Sprintf("%s", err)
return nil
}

if sz != 0 {
if st.Size() != int64(ssize)*sz {
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
bad[sector.ID] = fmt.Sprintf("%s is wrong size (got %d, expect %d)", p, st.Size(), int64(ssize)*sz)
return nil
}
Expand All @@ -118,14 +122,14 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
sector.ID.Number,
})
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating fallback challenges: %s", err)
return nil
}

commr, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("getting commR: %s", err)
return nil
}
Expand All @@ -136,12 +140,12 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
SectorNumber: sector.ID.Number,
SealedCID: commr,
},
CacheDirPath: lp.Cache,
CacheDirPath: fCache,
PoStProofType: wpp,
SealedSectorPath: lp.Sealed,
SealedSectorPath: fReplica,
}, ch.Challenges[sector.ID.Number])
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating vanilla proof: %s", err)
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion extern/sector-storage/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ func (mgr *SectorMgr) Remove(ctx context.Context, sector storage.SectorRef) erro
return nil
}

func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
bad := map[abi.SectorID]string{}

for _, sid := range ids {
Expand Down
4 changes: 2 additions & 2 deletions node/impl/storminer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ func (sm *StorageMinerAPI) CreateBackup(ctx context.Context, fpath string) error
return backup(ctx, sm.DS, fpath)
}

func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) {
func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) {
var rg storiface.RGetter
if expensive {
rg = func(ctx context.Context, id abi.SectorID) (cid.Cid, error) {
Expand All @@ -1143,7 +1143,7 @@ func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredP
}
}

bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, rg)
bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, update, rg)
if err != nil {
return nil, err
}
Expand Down
4 changes: 3 additions & 1 deletion storage/wdpost_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B

sectors := make(map[abi.SectorNumber]struct{})
var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
sectors[info.SectorNumber] = struct{}{}
tocheck = append(tocheck, storage.SectorRef{
Expand All @@ -215,9 +216,10 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B
Number: info.SectorNumber,
},
})
update = append(update, info.SectorKeyCID != nil)
}

bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, nil)
bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, update, nil)
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion storage/wdpost_run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func (m mockVerif) GenerateWinningPoStSectorChallenge(context.Context, abi.Regis
type mockFaultTracker struct {
}

func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
// Returns "bad" sectors so just return empty map meaning all sectors are good
return map[abi.SectorID]string{}, nil
}
Expand Down

0 comments on commit d711775

Please sign in to comment.