Skip to content

Commit

Permalink
Merge pull request #27 from tenstorrent/23-arc-fw-not-yet-booted-afte…
Browse files Browse the repository at this point in the history
…r-resume-from-standby

Suspend/resume support
  • Loading branch information
alewycky-tenstorrent authored Aug 6, 2024
2 parents 8c5a16b + 25fd140 commit 892a08b
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 14 deletions.
2 changes: 2 additions & 0 deletions device.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ struct tenstorrent_device_class {
u32 instance_size;
bool (*init_device)(struct tenstorrent_device *ttdev);
bool (*init_hardware)(struct tenstorrent_device *ttdev);
bool (*post_hardware_init)(struct tenstorrent_device *ttdev);
void (*cleanup_hardware)(struct tenstorrent_device *ttdev);
void (*cleanup_device)(struct tenstorrent_device *ttdev);
void (*first_open_cb)(struct tenstorrent_device *ttdev);
void (*last_release_cb)(struct tenstorrent_device *ttdev);
Expand Down
31 changes: 30 additions & 1 deletion enumerate.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/version.h>
#include <linux/pm.h>

#include "enumerate.h"
#include "interrupt.h"
Expand Down Expand Up @@ -90,7 +91,8 @@ static int tenstorrent_pci_probe(struct pci_dev *dev, const struct pci_device_id
tt_dev->interrupt_enabled = tenstorrent_enable_interrupts(tt_dev);

if (device_class->init_device(tt_dev))
device_class->init_hardware(tt_dev);
if (device_class->init_hardware(tt_dev))
device_class->post_hardware_init(tt_dev);

pci_save_state(dev);

Expand Down Expand Up @@ -142,6 +144,7 @@ static void tt_dev_release(struct kref *tt_dev_kref) {
if (tt_dev->dev_class->reboot)
unregister_reboot_notifier(&tt_dev->reboot_notifier);

tt_dev->dev_class->cleanup_hardware(tt_dev);
tt_dev->dev_class->cleanup_device(tt_dev);

pci_disable_pcie_error_reporting(pdev);
Expand All @@ -155,13 +158,39 @@ void tenstorrent_device_put(struct tenstorrent_device *tt_dev) {
kref_put(&tt_dev->kref, tt_dev_release);
}

static int tenstorrent_suspend(struct device *dev) {
struct pci_dev *pdev = to_pci_dev(dev);
struct tenstorrent_device *tt_dev = pci_get_drvdata(pdev);

tt_dev->dev_class->cleanup_hardware(tt_dev);

return 0;
}

static int tenstorrent_resume(struct device *dev) {
struct pci_dev *pdev = to_pci_dev(dev);
struct tenstorrent_device *tt_dev = pci_get_drvdata(pdev);

int ret = tt_dev->dev_class->init_hardware(tt_dev);

// Suspend invalidates the saved state.
if (ret == 0)
pci_save_state(pdev);

return ret;
}

static SIMPLE_DEV_PM_OPS(tenstorrent_pm_ops, tenstorrent_suspend, tenstorrent_resume);

extern const struct pci_device_id tenstorrent_ids[];
static struct pci_driver tenstorrent_pci_driver = {
.name = TENSTORRENT,
.id_table = tenstorrent_ids,
.probe = tenstorrent_pci_probe,
.remove = tenstorrent_pci_remove,
.shutdown = tenstorrent_pci_remove,

.driver.pm = &tenstorrent_pm_ops,
};

int tenstorrent_pci_register_driver(void)
Expand Down
17 changes: 14 additions & 3 deletions grayskull.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ static void grayskull_hwmon_init(struct grayskull_device *gs_dev) {
if (IS_ERR(hwmon_device))
goto grayskull_hwmon_init_err;

tt_dev->attributes = gs_attributes;

return;

Expand Down Expand Up @@ -831,18 +832,26 @@ static bool grayskull_init_hardware(struct tenstorrent_device *tt_dev) {

grayskull_noc_init(gs_dev);

grayskull_hwmon_init(gs_dev);
return true;
}

tt_dev->attributes = gs_attributes;
static bool grayskull_post_hardware_init(struct tenstorrent_device *tt_dev) {
struct grayskull_device *gs_dev = tt_dev_to_gs_dev(tt_dev);

grayskull_hwmon_init(gs_dev);

return true;
}

static void grayskull_cleanup(struct tenstorrent_device *tt_dev) {
static void grayskull_cleanup_hardware(struct tenstorrent_device *tt_dev) {
struct grayskull_device *gs_dev = tt_dev_to_gs_dev(tt_dev);

if (gs_dev->reset_unit_regs != NULL)
grayskull_shutdown_firmware(tt_dev->pdev, gs_dev->reset_unit_regs);
}

static void grayskull_cleanup(struct tenstorrent_device *tt_dev) {
struct grayskull_device *gs_dev = tt_dev_to_gs_dev(tt_dev);

if (gs_dev->reg_iomap != NULL)
pci_iounmap(gs_dev->tt.pdev, gs_dev->reg_iomap);
Expand All @@ -868,6 +877,8 @@ struct tenstorrent_device_class grayskull_class = {
.instance_size = sizeof(struct grayskull_device),
.init_device = grayskull_init,
.init_hardware = grayskull_init_hardware,
.post_hardware_init = grayskull_post_hardware_init,
.cleanup_hardware = grayskull_cleanup_hardware,
.cleanup_device = grayskull_cleanup,
.last_release_cb = grayskull_last_release_handler,
};
26 changes: 16 additions & 10 deletions wormhole.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ static void wormhole_hwmon_init(struct wormhole_device *wh_dev) {
if (IS_ERR(hwmon_device))
goto wormhole_hwmon_init_err;

tt_dev->attributes = wh_attributes;

return;

wormhole_hwmon_init_err:
Expand All @@ -179,17 +181,25 @@ static bool wormhole_init_hardware(struct tenstorrent_device *tt_dev) {
grayskull_send_arc_fw_message_with_args(reset_unit_regs(wh_dev), WH_FW_MSG_UPDATE_M3_AUTO_RESET_TIMEOUT, auto_reset_timeout, 0, 10000, NULL);
}

wormhole_hwmon_init(wh_dev);
return true;
}

tt_dev->attributes = wh_attributes;
static bool wormhole_post_hardware_init(struct tenstorrent_device *tt_dev) {
struct wormhole_device *wh_dev = tt_dev_to_wh_dev(tt_dev);

wormhole_hwmon_init(wh_dev);

return true;
}

static void wormhole_cleanup(struct tenstorrent_device *tt_dev) {
static void wormhole_cleanup_hardware(struct tenstorrent_device *tt_dev) {
struct wormhole_device *wh_dev = tt_dev_to_wh_dev(tt_dev);

grayskull_shutdown_firmware(tt_dev->pdev, reset_unit_regs(wh_dev));
}

static void wormhole_cleanup(struct tenstorrent_device *tt_dev) {
struct wormhole_device *wh_dev = tt_dev_to_wh_dev(tt_dev);

if (wh_dev->bar2_mapping != NULL)
pci_iounmap(wh_dev->tt.pdev, wh_dev->bar2_mapping);
Expand All @@ -198,17 +208,13 @@ static void wormhole_cleanup(struct tenstorrent_device *tt_dev) {
pci_iounmap(wh_dev->tt.pdev, wh_dev->bar4_mapping);
}

static void wormhole_reboot(struct tenstorrent_device *tt_dev) {
struct wormhole_device *wh_dev = tt_dev_to_wh_dev(tt_dev);

grayskull_shutdown_firmware(tt_dev->pdev, reset_unit_regs(wh_dev));
}

struct tenstorrent_device_class wormhole_class = {
.name = "Wormhole",
.instance_size = sizeof(struct wormhole_device),
.init_device = wormhole_init,
.init_hardware = wormhole_init_hardware,
.post_hardware_init = wormhole_post_hardware_init,
.cleanup_hardware = wormhole_cleanup_hardware,
.cleanup_device = wormhole_cleanup,
.reboot = wormhole_reboot,
.reboot = wormhole_cleanup_hardware,
};

0 comments on commit 892a08b

Please sign in to comment.