forked from tenstorrent/tt-kmd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcie.c
118 lines (87 loc) · 3.04 KB
/
pcie.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "pcie.h"
#include <linux/delay.h>
#include "module.h"
#include "device.h"
#include "enumerate.h"
#include "grayskull.h"
#define FW_MSG_PCIE_RETRAIN 0xB6
static bool poll_pcie_link_up(struct pci_dev *pdev, u32 timeout_ms) {
u16 tt_vendor_id;
ktime_t end_time = ktime_add_ms(ktime_get(), timeout_ms);
pci_read_config_word(pdev, PCI_VENDOR_ID, &tt_vendor_id);
while (tt_vendor_id != PCI_VENDOR_ID_TENSTORRENT) {
if (ktime_after(ktime_get(), end_time)) {
pr_debug("device timeout during link up.\n");
return false;
}
pci_read_config_word(pdev, PCI_VENDOR_ID, &tt_vendor_id);
msleep(100);
}
pr_debug("device link up successfully.\n");
return true;
}
bool safe_pci_restore_state(struct pci_dev *pdev) {
u16 vendor_id;
if (!pdev->state_saved)
return false;
// Start with a test read. pci_restore_state calls pci_find_next_ext_capability which has
// a bounded loop that is still long enough to trigger a soft lockup warning if hardware
// is extremely misbehaving.
if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor_id) != PCIBIOS_SUCCESSFUL
|| vendor_id != PCI_VENDOR_ID_TENSTORRENT)
return false;
pci_restore_state(pdev);
pci_save_state(pdev);
return true;
}
bool pcie_hot_reset_and_restore_state(struct pci_dev *pdev) {
struct pci_dev *bridge_dev = pci_upstream_bridge(pdev);
u16 bridge_ctrl;
if (!bridge_dev)
return false;
// reset link - like pci_reset_secondary_bus, but we don't want the full 1s delay.
pci_read_config_word(bridge_dev, PCI_BRIDGE_CONTROL, &bridge_ctrl);
pci_write_config_word(bridge_dev, PCI_BRIDGE_CONTROL, bridge_ctrl | PCI_BRIDGE_CTL_BUS_RESET);
msleep(2);
pci_write_config_word(bridge_dev, PCI_BRIDGE_CONTROL, bridge_ctrl);
msleep(500);
if (!poll_pcie_link_up(pdev, 10000))
return false;
if (!safe_pci_restore_state(pdev))
return false;
return true;
}
bool complete_pcie_init(struct tenstorrent_device *tt_dev, u8 __iomem* reset_unit_regs) {
struct pci_dev *pdev = tt_dev->pdev;
struct pci_dev *bridge_dev = pci_upstream_bridge(pdev);
unsigned int i;
if (!bridge_dev || reset_limit == 0)
return true;
for (i = 0; i < reset_limit; i++) {
u16 target_link_speed;
u16 subsys_vendor_id;
u16 exit_code;
bool last_retry = (i == reset_limit - 1);
pcie_capability_read_word(bridge_dev, PCI_EXP_LNKCTL2, &target_link_speed);
target_link_speed &= PCI_EXP_LNKCTL2_TLS;
pci_read_config_word(bridge_dev, PCI_SUBSYSTEM_VENDOR_ID, &subsys_vendor_id);
if (!grayskull_send_arc_fw_message_with_args(reset_unit_regs, FW_MSG_PCIE_RETRAIN,
target_link_speed | (last_retry << 15), subsys_vendor_id, 200000, &exit_code))
return false;
if (exit_code == 0) {
pr_debug("pcie init passed after %u iterations.\n", i);
return true;
} else {
pr_debug("pcie init failed on iteration %u.\n", i);
if (last_retry)
return false;
}
pci_save_state(pdev);
if (!pcie_hot_reset_and_restore_state(pdev))
return false;
}
return false;
}