Merge pull request #9 from no92/misc-fixes

Miscellaneous fixes
managarm · Oct 21, 2023 · 310ee00 · 310ee00
2 parents 4837d8e + f7854a7
commit 310ee00
Show file tree

Hide file tree

Showing 9 changed files with 93 additions and 33 deletions.
diff --git a/src/coffee_lake/dp.c b/src/coffee_lake/dp.c
@@ -41,23 +41,11 @@
 #define DDI_AUX_NATIVE_WRITE 0x8
 #define DDI_AUX_NATIVE_READ 0x9
 
-#define DPCD_REV 0x0
-#define DPCD_MAX_LINK_RATE 0x1
-#define DPCD_MAX_LANE_COUNT 0x2
-#define DPCD_MAX_DOWNSPREAD 0x3
 #define NO_AUX_HANDSHAKE_LINK_TRAINING (1 << 6)
 
-#define DPCD_DOWNSTREAMPORT_PRESENT 0x5
-#define DPCD_EDP_CONFIGURATION_CAP 0xD
-#define DPCD_DOWNSTREAM_PORT0_CAP 0x80
-
-#define DPCD_TRAIN_PATTERN 0x102
-
-#define DPCD_SET_POWER 0x600
 #define DPCD_POWER_D0 1
 #define DPCD_POWER_D3 2
 
-
 #define DP_TP_CTL(c) (0x64040 + ((c) * 0x100))
 #define DP_TP_STS(c) (0x64044 + ((c) * 0x100))
 
@@ -354,7 +342,17 @@ uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr) {
     return res.data[0];
 }
 
-static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
+void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) {
+    AuxRequest req = {0};
+    req.request = DDI_AUX_NATIVE_READ;
+    req.address = addr;
+    req.size = n;
+    AuxResponse res = dp_aux_cmd(gpu, req);
+
+    memcpy(buf, res.data, n);
+}
+
+void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
     AuxRequest req = {0};
     req.request = DDI_AUX_NATIVE_WRITE;
     req.address = addr;
@@ -363,6 +361,15 @@ static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
     dp_aux_cmd(gpu, req);
 }
 
+void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) {
+    AuxRequest req = {0};
+    req.request = DDI_AUX_NATIVE_WRITE;
+    req.address = addr;
+    req.size = n;
+    memcpy(req.tx, buf, n);
+    dp_aux_cmd(gpu, req);
+}
+
 #define DDC_SEGMENT 0x30
 #define DDC_ADDR 0x50
 #define EDID_SIZE 128
@@ -512,15 +519,15 @@ static void dp_set_sink_power(struct LilGpu* gpu, struct LilConnector* connector
     if(on) {
         lil_panic("TODO: Turn Sink on");
     } else {
-        uint8_t downstream = dp_aux_native_read(gpu, DPCD_DOWNSTREAMPORT_PRESENT);
+        uint8_t downstream = dp_aux_native_read(gpu, DOWNSTREAMPORT_PRESENT);
         if(rev == 0x11 && (downstream & 1)) {
-            uint8_t port0 = dp_aux_native_read(gpu, DPCD_DOWNSTREAM_PORT0_CAP);
+            uint8_t port0 = dp_aux_native_read(gpu, DOWNSTREAM_PORT0_CAP);
             if(port0 & (1 << 3)) { // HPD Aware
                 return;
             }
         }
 
-        dp_aux_native_write(gpu, DPCD_SET_POWER, DPCD_POWER_D3);
+        dp_aux_native_write(gpu, SET_POWER, DPCD_POWER_D3);
     }
 }
 
@@ -548,7 +555,7 @@ void lil_cfl_dp_init(struct LilGpu* gpu, struct LilConnector* connector) {
 
     *cstate &= ~0x3; // Disable DC5 and DC6 state*/
 
-    uint8_t cap = dp_aux_native_read(gpu, DPCD_EDP_CONFIGURATION_CAP);
+    uint8_t cap = dp_aux_native_read(gpu, EDP_CONFIGURATION_CAP);
     connector->type = (cap != 0) ? EDP : DISPLAYPORT; // Hacky, but it should work on any eDP display that is semi-modern, better option is to parse VBIOS
 
     edp_panel_on(gpu, connector);
@@ -682,7 +689,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) {
     v |= DPLL_CTRL1_PROGRAM_ENABLE(dpll);
     v &= ~DPLL_CTRL1_HDMI_MODE(dpll); // DP mode
     v &= ~DPLL_CTRL1_LINK_RATE_MASK(dpll);
-    v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE));
+    v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, MAX_LINK_RATE));
     *dpll_ctrl1 = v;
     (void)*dpll_ctrl1;
 
@@ -720,7 +727,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) {
 
     lil_sleep(5);
 
-    if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, DPCD_MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) {
+    if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) {
         lil_sleep(2);
         v = *dp_tp_ctl;
         v &= ~DP_TP_CTL_TRAIN_MASK;
@@ -774,7 +781,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) {
 
     uint64_t m = 3 * mode->bpc * mode->clock * 1000;
 
-    uint8_t link_rate = dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE);
+    uint8_t link_rate = dp_aux_native_read(gpu, MAX_LINK_RATE);
     uint64_t symbol_rate = 0;
     if(link_rate == 0x6)
         symbol_rate = 162000000;
@@ -785,7 +792,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) {
     else
         lil_panic("Unknown DP Link Speed");
 
-    uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, DPCD_MAX_LANE_COUNT) & 0xF);
+    uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, MAX_LANE_COUNT) & 0xF);
     cancel_m_n(&m, &n, DATA_N_MAX);
     ret.data_m = m;
     ret.data_n = n;

diff --git a/src/coffee_lake/dp.h b/src/coffee_lake/dp.h
@@ -2,6 +2,28 @@
 
 #include <lil/intel.h>
 
+enum DPCD_ADDRESSES {
+	DPCD_REV = 0x0,
+	MAX_LINK_RATE = 0x1,
+	MAX_LANE_COUNT = 0x2,
+	MAX_DOWNSPREAD = 0x3,
+	DOWNSTREAMPORT_PRESENT = 0x5,
+	EDP_CONFIGURATION_CAP = 0xD,
+	TRAINING_AUX_RD_INTERVAL = 0xE,
+	DOWNSTREAM_PORT0_CAP = 0x80,
+	LINK_BW_SET = 0x100,
+	LANE_COUNT_SET = 0x101,
+	TRAINING_PATTERN_SET = 0x102,
+	TRAINING_LANE0_SET = 0x103,
+	LINK_RATE_SET = 0x115,
+	DP_LANE0_1_STATUS = 0x202,
+	DP_LANE2_3_STATUS = 0x203,
+	LANE_ALIGN_STATUS_UPDATED = 0x204,
+	ADJUST_REQUEST_LANE0_1 = 0x206,
+	SET_POWER = 0x600,
+	EDP_DPCD_REV = 0x700,
+};
+
 void lil_cfl_dp_get_mode_info(LilGpu* gpu, LilModeInfo* out);
 
 bool lil_cfl_dp_is_connected (struct LilGpu* gpu, struct LilConnector* connector);
@@ -23,3 +45,6 @@ typedef struct {
 LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode);
 
 uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr);
+void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf);
+void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v);
+void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf);
diff --git a/src/coffee_lake/gtt.c b/src/coffee_lake/gtt.c
@@ -1,17 +1,22 @@
-#include "gtt.h"
-
 #include <lil/imports.h>
 
+#include "src/helpers.h"
+#include "src/coffee_lake/gtt.h"
+
+#define GTT_HAW 39
+
 void lil_cfl_vmem_clear(LilGpu* gpu) {
     for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) {
-        volatile uint64_t* gtt = (uint64_t*)(gpu->gtt_address + i * 8);
-        *gtt = 0;
+        GTT64_ENTRY(gpu, i << 12) = 0;
     }
 }
 
 void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) {
-    if ((host & ~0xFFFFFFFFFF) != 0)
-        lil_panic("Coffee Lake GPUs only supports 40-bit host addresses"); // TODO: Servers support 46-bits (At least according to the Skylake PRMs) support this
+    // TODO: Servers support 46-bits (At least according to the Skylake PRMs); support this
+    uint64_t mask = ((1UL << GTT_HAW) - 1) & ~0xFFF;
+
+    if ((host & ~mask) != 0)
+        lil_panic("Coffee Lake GPUs only support " STRINGIFY(GTT_HAW) "-bit host addresses");
 
-    *(volatile uint64_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 8) = host | 1; // Present
+    GTT64_ENTRY(gpu, gpu_addr) = (host & mask) | GTT_PAGE_PRESENT;
 }
diff --git a/src/coffee_lake/gtt.h b/src/coffee_lake/gtt.h
@@ -2,5 +2,7 @@
 
 #include <lil/intel.h>
 
+#include "src/gtt.h"
+
 void lil_cfl_vmem_clear(LilGpu* gpu);
 void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr);
diff --git a/src/gtt.h b/src/gtt.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#define GTT_PAGE_PRESENT 1
+
+#define GTT64_ENTRY(gpu, gpu_addr) (*(volatile uint64_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 8))
+#define GTT32_ENTRY(gpu, gpu_addr) (*(volatile uint32_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 4))
diff --git a/src/helpers.h b/src/helpers.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#define STRINGIFY(a) STRINGIFY_(a)
+#define STRINGIFY_(a) #a
diff --git a/src/ivy_bridge/gtt.c b/src/ivy_bridge/gtt.c
@@ -1,17 +1,17 @@
-#include "gtt.h"
-
 #include <lil/imports.h>
 
+#include "src/gtt.h"
+#include "src/ivy_bridge/gtt.h"
+
 void lil_ivb_vmem_clear(LilGpu* gpu) {
     for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) {
-        volatile uint32_t* gtt = (uint32_t*)(gpu->gtt_address + i * 4);
-        *gtt = 0;
+        GTT32_ENTRY(gpu, i << 12) = 0;
     }
 }
 
 void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) {
     if ((host & ~0xFFFFFFFFFF) != 0)
         lil_panic("Ivy Bridge GPU only supports 40-bit host addresses");
 
-    *(volatile uint32_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 4) = host | (((host >> 32) & 0xFF) << 4) | 0b110 | 1; // MLC/LLC Caching, Present
+    GTT32_ENTRY(gpu, gpu_addr) = host | (((host >> 32) & 0xFF) << 4) | GTT_IVB_CACHE_MLC_LLC | GTT_PAGE_PRESENT;
 }
diff --git a/src/ivy_bridge/gtt.h b/src/ivy_bridge/gtt.h
@@ -2,5 +2,9 @@
 
 #include <lil/intel.h>
 
+#include "src/gtt.h"
+
+#define GTT_IVB_CACHE_MLC_LLC 0b110
+
 void lil_ivb_vmem_clear(LilGpu* gpu);
 void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr);
diff --git a/src/pci.c b/src/pci.c
@@ -5,6 +5,10 @@
 #include <stddef.h>
 
 void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) {
+	/* disable Bus Mastering and Memory + I/O space access */
+	uint16_t command = lil_pci_readw(device, 4);
+	lil_pci_writew(device, 4, command & ~7); /* Bus Master | Memory Space | I/O Space */
+
     size_t reg_index = 0x10 + bar * 4;
     uint64_t bar_low = lil_pci_readd(device, reg_index), bar_size_low;
     uint64_t bar_high = 0, bar_size_high = ~0;
@@ -35,4 +39,7 @@ void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) {
 
     *obase = base;
     *len = size;
+
+	/* restore Bus Mastering and Memory + I/O space access */
+	lil_pci_writew(device, 4, command);
 }