Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dell] S6000 I2C not responding to certain optics #8736

Merged
merged 3 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 101 additions & 1 deletion device/dell/x86_64-dell_s6000_s1220-r0/plugins/sfputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
try:
import time
import datetime
import fcntl
from sonic_sfp.sfputilbase import SfpUtilBase
except ImportError as e:
raise ImportError("%s - required module not found" % str(e))
Expand All @@ -19,6 +20,7 @@ class SfpUtil(SfpUtilBase):
PORTS_IN_BLOCK = 32

EEPROM_OFFSET = 20
SFP_LOCK_FILE="/etc/sonic/sfp_lock"

_port_to_eeprom_mapping = {}
port_dict = {}
Expand Down Expand Up @@ -72,10 +74,19 @@ def get_presence(self, port_num):
if port_num < self.port_start or port_num > self.port_end:
return False

try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is qsfp_modprs pin access needs a lock? Reading of sysfs entry shouldn't need lock. I see this https://github.com/Azure/sonic-buildimage/blob/master/platform/broadcom/sonic-platform-modules-dell/s6000/modules/dell_s6000_platform.c#L286 already thread safe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lock is provided for writing qsfp_modsel register.
When multiple ports are enabled via qsfp_modsel register, we observed that qsfp_modprs values are being wrong. At a time only one port should be enabled in qsfp_modsel.

fd = open(self.SFP_LOCK_FILE, "r")
except IOError as e:
print("Error: unable to open file: "+ str(e))
return False
fcntl.flock(fd, fcntl.LOCK_EX)
self.set_modsel(port_num)

try:
reg_file = open("/sys/devices/platform/dell-s6000-cpld.0/qsfp_modprs")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
fcntl.flock(fd, fcntl.LOCK_UN)
return False

content = reg_file.readline().rstrip()
Expand All @@ -85,13 +96,102 @@ def get_presence(self, port_num):

# Mask off the bit corresponding to our port
mask = (1 << port_num)

fcntl.flock(fd, fcntl.LOCK_UN)
# ModPrsL is active low
if reg_value & mask == 0:
return True

return False

def get_modsel(self, port_num):
# Check for invalid port_num
if port_num < self.port_start or port_num > self.port_end:
return False

try:
reg_file = open("/sys/devices/platform/dell-s6000-cpld.0/qsfp_modsel")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return False

content = reg_file.readline().rstrip()

# content is a string containing the hex representation of the register
reg_value = int(content, 16)

# Mask off the bit corresponding to our port
mask = (1 << port_num)

if reg_value & mask == 1:
return False

return True

def set_modsel(self, port_num):
# Check for invalid port_num
if port_num < self.port_start or port_num > self.port_end:
return False

try:
reg_file = open("/sys/devices/platform/dell-s6000-cpld.0/qsfp_modsel", "r+")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return False

content = reg_file.readline().rstrip()

# content is a string containing the hex representation of the register
reg_value = int(content, 16)

# Mask off the bit corresponding to our port
mask = (1 << port_num)
reg_value = reg_value | int("0xffffffff", 16)
reg_value = reg_value & ~mask

# Convert our register value back to a hex string and write back
content = hex(reg_value)

reg_file.seek(0)
reg_file.write(content)
reg_file.close()

return True

def get_eeprom_raw(self, port_num, num_bytes=256):
# Read interface id EEPROM at addr 0x50
try:
fd = open(self.SFP_LOCK_FILE, "r")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return None
fcntl.flock(fd, fcntl.LOCK_EX)
self.set_modsel(port_num)
eeprom_bytes = self._read_eeprom_devid(port_num, self.IDENTITY_EEPROM_ADDR, 0, num_bytes)
fcntl.flock(fd, fcntl.LOCK_UN)
return eeprom_bytes

def get_eeprom_dom_raw(self, port_num):
if port_num in self.osfp_ports:
return None
if port_num in self.qsfp_ports:
# QSFP DOM EEPROM is also at addr 0x50 and thus also stored in eeprom_ifraw
return None
else:
# Read dom eeprom at addr 0x51
if not self.get_modsel(port_num):
try:
fd = open(self.SFP_LOCK_FILE, "r")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return None
fcntl.flock(fd, fcntl.LOCK_EX)
self.set_modsel(port_num)
eeprom_bytes = self._read_eeprom_devid(port_num, self.DOM_EEPROM_ADDR, 0)
fcntl.flock(fd, fcntl.LOCK_UN)
return eeprom_bytes
else:
return self._read_eeprom_devid(port_num, self.DOM_EEPROM_ADDR, 0)

def get_low_power_mode(self, port_num):
# Check for invalid port_num
if port_num < self.port_start or port_num > self.port_end:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,25 @@ static ssize_t get_modsel(struct device *dev, struct device_attribute *devattr,
return sprintf(buf, "0x%08x\n", data);
}

static ssize_t set_modsel(struct device *dev, struct device_attribute *devattr, const char *buf, size_t count)
{
int err;
unsigned long data = 0;
struct cpld_platform_data *pdata = dev->platform_data;

err = kstrtoul(buf, 16, &data);
if (err)
return err;

dell_i2c_smbus_write_byte_data(pdata[slave_cpld].client, 0x0, (u8)(data & 0xff));
dell_i2c_smbus_write_byte_data(pdata[slave_cpld].client, 0x1, (u8)((data >> 8) & 0xff));
dell_i2c_smbus_write_byte_data(pdata[master_cpld].client, 0xa, (u8)((data >> 16) & 0xff));
dell_i2c_smbus_write_byte_data(pdata[master_cpld].client, 0xb, (u8)((data >> 24) & 0xff));

msleep(2); // As per HW spec
return count;
}

static ssize_t get_lpmode(struct device *dev, struct device_attribute *devattr, char *buf)
{
int ret;
Expand Down Expand Up @@ -1128,7 +1147,7 @@ static ssize_t get_reboot_reason(struct device *dev,
return sprintf(buf, "0x%x\n", data);
}

static DEVICE_ATTR(qsfp_modsel, S_IRUGO, get_modsel, NULL);
static DEVICE_ATTR(qsfp_modsel, S_IRUGO | S_IWUSR, get_modsel, set_modsel);
static DEVICE_ATTR(qsfp_modprs, S_IRUGO, get_modprs, NULL);
static DEVICE_ATTR(qsfp_lpmode, S_IRUGO | S_IWUSR, get_lpmode, set_lpmode);
static DEVICE_ATTR(qsfp_reset, S_IRUGO | S_IWUSR, get_reset, set_reset);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ remove_python_api_package() {
# read SONiC immutable variables
[ -f /etc/sonic/sonic-environment ] && . /etc/sonic/sonic-environment

if [ ! -e /etc/sonic/sfp_lock ]; then
touch /etc/sonic/sfp_lock
fi

if [[ "$1" == "init" ]]; then
depmod -a
modprobe nvram
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import re
import struct
import time
import fcntl
from sonic_platform_base.sfp_base import SfpBase
from sonic_platform_base.sonic_sfp.sff8436 import sff8436InterfaceId
from sonic_platform_base.sonic_sfp.sff8436 import sff8436Dom
Expand Down Expand Up @@ -154,6 +155,15 @@ def _get_eeprom_data(self, eeprom_key):
if (self.sfpInfo is None):
return None

SFP_LOCK_FILE="/etc/sonic/sfp_lock"
prgeor marked this conversation as resolved.
Show resolved Hide resolved
try:
fd = open(SFP_LOCK_FILE, "r")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return None
fcntl.flock(fd, fcntl.LOCK_EX)
self.set_modsel()

page_offset = sff8436_parser[eeprom_key][PAGE_OFFSET]
eeprom_data_raw = self._read_eeprom_bytes(
self.eeprom_path,
Expand All @@ -172,6 +182,7 @@ def _get_eeprom_data(self, eeprom_key):
self.sfpDomInfo, sff8436_parser[eeprom_key][FUNC_NAME])(
eeprom_data_raw, 0)

fcntl.flock(fd, fcntl.LOCK_UN)
return eeprom_data

def _strip_unit_from_str(self, value_str):
Expand Down Expand Up @@ -423,6 +434,16 @@ def get_presence(self):
Retrieves the presence of the sfp
"""
presence_ctrl = self.sfp_control + 'qsfp_modprs'
SFP_LOCK_FILE="/etc/sonic/sfp_lock"

try:
fd = open(SFP_LOCK_FILE, "r")
except IOError as e:
print("Error: unable to open file: %s" % str(e))
return False
fcntl.flock(fd, fcntl.LOCK_EX)
self.set_modsel()

try:
reg_file = open(presence_ctrl)
except IOError as e:
Expand All @@ -437,12 +458,69 @@ def get_presence(self):
# Mask off the bit corresponding to our port
mask = (1 << self.sfp_ctrl_idx)

fcntl.flock(fd, fcntl.LOCK_UN)

# ModPrsL is active low
if ((reg_value & mask) == 0):
return True

return False

def get_modsel(self):
modsel_ctrl = self.sfp_control + 'qsfp_modsel'
try:
reg_file = open(modsel_ctrl, "r+")
except IOError as e:
return False

reg_hex = reg_file.readline().rstrip()

# content is a string containing the hex
# representation of the register
reg_value = int(reg_hex, 16)

# Mask off the bit corresponding to our port
index = self.sfp_ctrl_idx

mask = (1 << index)

if ((reg_value & mask) == 1):
modsel_state = False
else:
modsel_state = True

return modsel_state

def set_modsel(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you bring the lock inside set_modsel(), then you don't have to lock/unlock every place where set_modsel() is called, keeps code changes to minimum.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lock is based on set_modsel() but we cant introduce it here.
A scenario:

  1. When sfputil tries to read a port, it checks for modprs register and to read that we are setting modsel register.
  2. Before we actually reading modprs register, if pmon docker or any other process trying to set modsel register then it will have the race condition here.
  3. To avoid this we are locking before modsel register is being set and releasing the lock only after the requirements are over(In our case after retriving modprs register).

modsel_ctrl = self.sfp_control + 'qsfp_modsel'
try:
reg_file = open(modsel_ctrl, "r+")
except IOError as e:
return False

reg_hex = reg_file.readline().rstrip()

# content is a string containing the hex
# representation of the register
reg_value = int(reg_hex, 16)

# Mask off the bit corresponding to our port
index = self.sfp_ctrl_idx

reg_value = reg_value | int("0xffffffff", 16)
mask = (1 << index)

reg_value = (reg_value & ~mask)

# Convert our register value back to a hex string and write back
content = hex(reg_value)

reg_file.seek(0)
reg_file.write(content)
reg_file.close()

return True

def get_model(self):
"""
Retrieves the model number (or part number) of the sfp
Expand Down