Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cephfs: Fix Removal of IPs from blocklist #4815

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/csi-addons/cephfs/network_fence.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (fcs *FenceControllerServer) UnfenceClusterNetwork(
return nil, status.Error(codes.Internal, err.Error())
}

err = nwFence.RemoveNetworkFence(ctx)
err = nwFence.RemoveClientEviction(ctx)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error())
}
Expand Down
161 changes: 133 additions & 28 deletions internal/csi-addons/networkfence/fencing.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ type activeClient struct {
Inst string `json:"inst"`
}

// IPWithNonce represents the structure of an IP with nonce
// as listed by Ceph OSD blocklist.
type IPWithNonce struct {
IP string `json:"ip"`
Nonce string `json:"nonce"`
}

// NewNetworkFence returns a networkFence struct object from the Network fence/unfence request.
func NewNetworkFence(
ctx context.Context,
Expand Down Expand Up @@ -256,9 +263,7 @@ func (ac *activeClient) fetchID() (int, error) {
}

// AddClientEviction blocks access for all the IPs in the CIDR block
// using client eviction.
// blocks the active clients listed in cidr, and the IPs
// for whom there is no active client present too.
// using client eviction, it also blocks the entire CIDR.
func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
evictedIPs := make(map[string]bool)
// fetch active clients
Expand All @@ -269,13 +274,15 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
// iterate through CIDR blocks and check if any active client matches
for _, cidr := range nf.Cidr {
for _, client := range activeClients {
clientIP, err := client.fetchIP()
var clientIP string
clientIP, err = client.fetchIP()
if err != nil {
return fmt.Errorf("error fetching client IP: %w", err)
}
// check if the clientIP is in the CIDR block
if isIPInCIDR(ctx, clientIP, cidr) {
clientID, err := client.fetchID()
var clientID int
clientID, err = client.fetchID()
if err != nil {
return fmt.Errorf("error fetching client ID: %w", err)
}
Expand All @@ -291,25 +298,10 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error {
}
}

// blocklist the IPs in CIDR without any active clients
for _, cidr := range nf.Cidr {
// check if the CIDR is evicted
// fetch the list of IPs from a CIDR block
hosts, err := getIPRange(cidr)
if err != nil {
return fmt.Errorf("failed to convert CIDR block %s to corresponding IP range: %w", cidr, err)
}

// add ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts {
if evictedIPs[host] {
continue
}
err = nf.addCephBlocklist(ctx, host, false)
if err != nil {
return err
}
}
// add the range based blocklist for CIDR
err = nf.AddNetworkFence(ctx)
if err != nil {
return err
}

return nil
Expand Down Expand Up @@ -358,7 +350,8 @@ func GetCIDR(cidrs Cidrs) ([]string, error) {
}

// removeCephBlocklist removes an IP from ceph osd blocklist.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useRange bool) error {
// the value of nonce is ignored if useRange is true.
func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip, nonce string, useRange bool) error {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
Expand All @@ -368,7 +361,15 @@ func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useR
if useRange {
cmd = append(cmd, "range")
}
cmd = append(cmd, "rm", ip)

// If nonce is not empty and we are not using
// range based blocks, we need to add the nonce
if nonce != "" && !useRange {
cmd = append(cmd, "rm", fmt.Sprintf("%s:0/%s", ip, nonce))
} else {
cmd = append(cmd, "rm", ip)
}

cmd = append(cmd, arg...)

_, stdErr, err := util.ExecCommand(ctx, "ceph", cmd...)
Expand Down Expand Up @@ -396,7 +397,7 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
// try range blocklist cmd, if invalid fallback to
// iterating through IP range.
if hasBlocklistRangeSupport {
err := nf.removeCephBlocklist(ctx, cidr, true)
err := nf.removeCephBlocklist(ctx, cidr, "", true)
if err == nil {
continue
}
Expand All @@ -412,7 +413,41 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {
}
// remove ceph blocklist for each IP in the range mentioned by the CIDR
for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host, false)
// 0 is used as nonce here to tell ceph
// to remove the blocklist entry matching: <host>:0/0
// it is same as telling ceph to remove just the IP
// without specifying any port or nonce with it.
err := nf.removeCephBlocklist(ctx, host, "0", false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@black-dragon74 Why is "0" used as nonce here ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to remove just the blacklist entry without specifying any extra details such as port and nonce. If you do not specify the port and nonce explicitly, ceph uses the default of 0/0 for port and nonce respectively.

Ex: ceph osd blocklist rm x.x.x.x, IP = x.x.x.x:0/0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to remove just the blacklist entry without specifying any extra details such as port and nonce. If you do not specify the port and nonce explicitly, ceph uses the default of 0/0 for port and nonce respectively.

Ex: ceph osd blocklist rm x.x.x.x, IP = x.x.x.x:0/0

Can you add it as comment just above this line ?
its a bit confusing why "0" pops as nonce all of a sudden.

if err != nil {
return err
}
}
}

return nil
}

func (nf *NetworkFence) RemoveClientEviction(ctx context.Context) error {
// Remove the CIDR block first
err := nf.RemoveNetworkFence(ctx)
if err != nil {
return err
}

// Get the ceph blocklist
blocklist, err := nf.getCephBlocklist(ctx)
if err != nil {
return err
}

// For each CIDR block, remove the IPs in the blocklist
// that fall under the CIDR with nonce
for _, cidr := range nf.Cidr {
hosts := nf.parseBlocklistForCIDR(ctx, blocklist, cidr)
log.DebugLog(ctx, "parsed blocklist for CIDR %s: %+v", cidr, hosts)

for _, host := range hosts {
err := nf.removeCephBlocklist(ctx, host.IP, host.Nonce, false)
if err != nil {
return err
}
Expand All @@ -421,3 +456,73 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error {

return nil
}

// getCephBlocklist fetches the ceph blocklist and returns it as a string.
func (nf *NetworkFence) getCephBlocklist(ctx context.Context) (string, error) {
arg := []string{
"--id", nf.cr.ID,
"--keyfile=" + nf.cr.KeyFile,
"-m", nf.Monitors,
}
// FIXME: replace the ceph command with go-ceph API in future
cmd := []string{"osd", "blocklist", "ls"}
cmd = append(cmd, arg...)
stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...)
if err != nil {
return "", fmt.Errorf("failed to get the ceph blocklist: %w, stderr: %q", err, stdErr)
}

return stdout, nil
}

// parseBlocklistEntry parses a single entry from the ceph blocklist
// and returns the IPWithNonce.
func (nf *NetworkFence) parseBlocklistEntry(entry string) IPWithNonce {
parts := strings.Fields(entry)
if len(parts) == 0 {
return IPWithNonce{}
}

ipPortNonce := strings.SplitN(parts[0], "/", 2)
if len(ipPortNonce) != 2 {
return IPWithNonce{}
}

ipPort := ipPortNonce[0]
nonce := ipPortNonce[1]

lastColonIndex := strings.LastIndex(ipPortNonce[0], ":")
if lastColonIndex == -1 {
return IPWithNonce{}
}

if len(ipPort) <= lastColonIndex {
return IPWithNonce{}
}
ip := ipPort[:lastColonIndex]
Madhu-1 marked this conversation as resolved.
Show resolved Hide resolved

return IPWithNonce{IP: ip, Nonce: nonce}
}

// parseBlocklistForCIDR scans the blocklist for the given CIDR and returns
// the list of IPs that lie within the CIDR along with their nonce.
func (nf *NetworkFence) parseBlocklistForCIDR(ctx context.Context, blocklist, cidr string) []IPWithNonce {
blocklistEntries := strings.Split(blocklist, "\n")

matchingHosts := make([]IPWithNonce, 0)
for _, entry := range blocklistEntries {
entry = strings.TrimSpace(entry)

// Skip unrelated ranged blocks and invalid entries
if strings.Contains(entry, "cidr") || !strings.Contains(entry, "/") {
continue
}

blockedHost := nf.parseBlocklistEntry(entry)
if isIPInCIDR(ctx, blockedHost.IP, cidr) {
matchingHosts = append(matchingHosts, blockedHost)
}
}

return matchingHosts
}
119 changes: 119 additions & 0 deletions internal/csi-addons/networkfence/fencing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package networkfence

import (
"context"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -138,3 +139,121 @@ func TestFetchID(t *testing.T) {
})
}
}

func TestParseBlocklistEntry(t *testing.T) {
t.Parallel()

tests := []struct {
name string
input string
expected IPWithNonce
}{
{
name: "Valid IP and nonce",
input: "192.168.1.1:6789/abcdef123456",
expected: IPWithNonce{IP: "192.168.1.1", Nonce: "abcdef123456"},
},
{
name: "IPv6 address with full notation",
input: "2001:0db8:0000:0000:0000:8a2e:0370:7334:6789/abc123",
expected: IPWithNonce{IP: "2001:0db8:0000:0000:0000:8a2e:0370:7334", Nonce: "abc123"},
},
{
name: "IPv6 address with compressed zeros",
input: "2001:db8::1428:57ab:6789/def456",
expected: IPWithNonce{IP: "2001:db8::1428:57ab", Nonce: "def456"},
},
{
name: "IPv6 loopback address",
input: "::1:6789/ghi789",
expected: IPWithNonce{IP: "::1", Nonce: "ghi789"},
},
{
name: "IPv6 address with IPv4 mapping",
input: "::ffff:192.0.2.128:6789/jkl012",
expected: IPWithNonce{IP: "::ffff:192.0.2.128", Nonce: "jkl012"},
},
{
name: "IP without port",
input: "10.0.0.1/nonce123",
expected: IPWithNonce{},
},
{
name: "Extra whitespace",
input: " 172.16.0.1:1234/abc123 extra info ",
expected: IPWithNonce{IP: "172.16.0.1", Nonce: "abc123"},
},
}

nf := &NetworkFence{}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result := nf.parseBlocklistEntry(tt.input)
require.Equal(t, tt.expected, result)
})
}
}

func TestParseBlocklistForCIDR(t *testing.T) {
t.Parallel()

testCases := []struct {
name string
blocklist string
cidr string
expected []IPWithNonce
}{
{
name: "Single IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{{IP: "192.168.1.1", Nonce: "1234567"}},
},
{
name: "Multiple IPv4 in CIDR",
blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000
192.168.1.2:0/7654321 expires 2023-07-01 11:00:00.000000
192.168.2.1:0/abcdefg expires 2023-07-01 12:00:00.000000
listed 3 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{
{IP: "192.168.1.1", Nonce: "1234567"},
{IP: "192.168.1.2", Nonce: "7654321"},
},
},
{
name: "IPv6 in CIDR",
blocklist: `2001:db8::1:0/fedcba expires 2023-07-01 10:00:00.000000
2001:db8::2:0/abcdef expires 2023-07-01 11:00:00.000000
listed 2 entries`,
cidr: "2001:db8::/64",
expected: []IPWithNonce{{IP: "2001:db8::1", Nonce: "fedcba"}, {IP: "2001:db8::2", Nonce: "abcdef"}},
},
{
name: "Empty blocklist",
blocklist: `listed 0 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
{
name: "No matching IPs",
blocklist: `10.0.0.1:0/1234567 expires 2023-07-01 10:00:00.000000
listed 1 entries`,
cidr: "192.168.1.0/24",
expected: []IPWithNonce{},
},
}

nf := &NetworkFence{}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

result := nf.parseBlocklistForCIDR(context.TODO(), tc.blocklist, tc.cidr)
require.Equal(t, tc.expected, result)
})
}
}