From 14410f9420184702fa4b0ee134a0aa37216398eb Mon Sep 17 00:00:00 2001 From: Ryan Zezeski Date: Thu, 23 Jun 2022 14:31:00 -0600 Subject: [PATCH] Want temporary hack to support inbound connections to guest (#167) --- illumos-ddi-dki/src/lib.rs | 21 +++ opte/src/engine/int_test.rs | 3 + opte/src/engine/mod.rs | 1 + opte/src/engine/nat.rs | 268 ++++++++++++++++++++++++++++ opte/src/oxide_vpc/engine/arp.rs | 43 ++++- opte/src/oxide_vpc/engine/mod.rs | 1 + opte/src/oxide_vpc/engine/nat4.rs | 61 +++++++ opte/src/oxide_vpc/engine/snat4.rs | 1 + opte/src/oxide_vpc/mod.rs | 2 + xde/src/xde.rs | 277 ++++++++++++++++++++++++----- xde/xde.conf | 8 + 11 files changed, 640 insertions(+), 46 deletions(-) create mode 100644 opte/src/engine/nat.rs create mode 100644 opte/src/oxide_vpc/engine/nat4.rs diff --git a/illumos-ddi-dki/src/lib.rs b/illumos-ddi-dki/src/lib.rs index 93d5e427..92047c41 100644 --- a/illumos-ddi-dki/src/lib.rs +++ b/illumos-ddi-dki/src/lib.rs @@ -326,6 +326,7 @@ pub const DDI_PROP_DONTPASS: c_uint = 0x0001; pub const DDI_PROP_CANSLEEP: c_uint = 0x0002; pub const DDI_PROP_SUCCESS: c_int = 0; +pub const DDI_PROP_NOT_FOUND: c_int = 1; pub const DDI_IPL_0: c_int = 0; @@ -413,7 +414,27 @@ extern "C" { level: c_int, ) -> *const ddi_periodic; pub fn ddi_periodic_delete(request: *const ddi_periodic); + pub fn ddi_prop_exists( + match_dev: dev_t, + dip: *mut dev_info, + flags: c_uint, + name: *const c_char, + ) -> c_int; pub fn ddi_prop_free(data: *mut c_void); + pub fn ddi_prop_get_int( + match_dev: dev_t, + dip: *mut dev_info, + flags: c_uint, + name: *const c_char, + defvalue: c_int, + ) -> c_int; + pub fn ddi_prop_get_int64( + match_dev: dev_t, + dip: *mut dev_info, + flags: c_uint, + name: *const c_char, + defvalue: i64, + ) -> i64; pub fn ddi_prop_lookup_string( match_dev: dev_t, dip: *mut dev_info, diff --git a/opte/src/engine/int_test.rs b/opte/src/engine/int_test.rs index 125b74c4..625a9382 100644 --- a/opte/src/engine/int_test.rs +++ b/opte/src/engine/int_test.rs @@ -130,6 +130,7 @@ fn lab_cfg() -> PortCfg { ]), vni: Vni::new(7777u32).unwrap(), }, + proxy_arp_enable: false, } } @@ -195,6 +196,7 @@ fn g1_cfg() -> PortCfg { ]), vni: Vni::new(7777u32).unwrap(), }, + proxy_arp_enable: false, } } @@ -226,6 +228,7 @@ fn g2_cfg() -> PortCfg { ]), vni: Vni::new(7777u32).unwrap(), }, + proxy_arp_enable: false, } } diff --git a/opte/src/engine/mod.rs b/opte/src/engine/mod.rs index f464cb6f..54baf5e6 100644 --- a/opte/src/engine/mod.rs +++ b/opte/src/engine/mod.rs @@ -23,6 +23,7 @@ pub mod ip4; #[macro_use] pub mod ip6; pub mod layer; +pub mod nat; #[macro_use] pub mod packet; pub mod port; diff --git a/opte/src/engine/nat.rs b/opte/src/engine/nat.rs new file mode 100644 index 00000000..5717be59 --- /dev/null +++ b/opte/src/engine/nat.rs @@ -0,0 +1,268 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2022 Oxide Computer Company + +use core::fmt; + +cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + use alloc::string::ToString; + use alloc::sync::Arc; + use alloc::vec::Vec; + } else { + use std::string::ToString; + use std::sync::Arc; + use std::vec::Vec; + } +} + +use super::ether::EtherMeta; +use super::ip4::Ipv4Meta; +use super::layer::InnerFlowId; +use super::port::meta::Meta; +use super::rule::{ + self, ActionDesc, AllowOrDeny, DataPredicate, Predicate, StatefulAction, HT, +}; +use crate::api::{Direction, Ipv4Addr, MacAddr}; + +#[derive(Clone)] +pub struct Nat4 { + priv_ip: Ipv4Addr, + public_ip: Ipv4Addr, +} + +impl Nat4 { + pub fn new(priv_ip: Ipv4Addr, public_ip: Ipv4Addr) -> Self { + Self { priv_ip: priv_ip.into(), public_ip: public_ip.into() } + } +} + +impl fmt::Display for Nat4 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} <=> {}", self.priv_ip, self.public_ip) + } +} + +impl StatefulAction for Nat4 { + fn gen_desc( + &self, + _flow_id: &InnerFlowId, + meta: &mut Meta, + ) -> rule::GenDescResult { + let mac_addr = meta.get::(); + let desc = Nat4Desc { + priv_ip: self.priv_ip, + public_ip: self.public_ip, + // XXX-EXT-IP This is assuming ext_ip_hack and will only + // allow for inbound connections, this will not work for + // outbound. If we want that we'll want to actually query + // the native router/ARP table. + src_mac: mac_addr.cloned(), + }; + Ok(AllowOrDeny::Allow(Arc::new(desc))) + } + + // XXX we should be able to set implicit predicates if we add an + // IpCidr field to describe which subnet the client is on; but for + // now just keep the predicates fully explicit. + fn implicit_preds(&self) -> (Vec, Vec) { + (vec![], vec![]) + } +} + +#[derive(Clone)] +pub struct Nat4Desc { + priv_ip: Ipv4Addr, + public_ip: Ipv4Addr, + // XXX-EXT-IP + src_mac: Option, +} + +pub const NAT4_NAME: &'static str = "NAT4"; + +impl ActionDesc for Nat4Desc { + fn gen_ht(&self, dir: Direction) -> HT { + match dir { + Direction::Out => { + let mut ht = HT { + name: NAT4_NAME.to_string(), + inner_ip: Ipv4Meta::modify( + Some(self.public_ip), + None, + None, + ), + ..Default::default() + }; + + // XXX-EXT-IP hack to rewrite destination MAC adress + // from virtual gateway addr to actual address that + // initiated connection. + if self.src_mac.is_some() { + ht.inner_ether = EtherMeta::modify(None, self.src_mac); + } + ht + } + + Direction::In => HT { + name: NAT4_NAME.to_string(), + inner_ip: Ipv4Meta::modify(None, Some(self.priv_ip), None), + ..Default::default() + }, + } + } + + fn name(&self) -> &str { + NAT4_NAME + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn nat4_rewrite() { + use crate::api::MacAddr; + use crate::engine::ether::{EtherMeta, ETHER_TYPE_IPV4}; + use crate::engine::headers::{IpMeta, UlpMeta}; + use crate::engine::ip4::Protocol; + use crate::engine::packet::{MetaGroup, PacketMeta}; + use crate::engine::tcp::TcpMeta; + + let priv_mac = MacAddr::from([0x02, 0x08, 0x20, 0xd8, 0x35, 0xcf]); + let dest_mac = MacAddr::from([0x78, 0x23, 0xae, 0x5d, 0x4f, 0x0d]); + let priv_ip = "10.0.0.220".parse().unwrap(); + let priv_port = "4999".parse().unwrap(); + let pub_ip = "52.10.128.69".parse().unwrap(); + let outside_ip = "76.76.21.21".parse().unwrap(); + let outside_port = 80; + let nat = Nat4::new(priv_ip, pub_ip); + let mut port_meta = Meta::new(); + + // ================================================================ + // Build the packet metadata + // ================================================================ + let ether = EtherMeta { + src: priv_mac, + dst: dest_mac, + ether_type: ETHER_TYPE_IPV4, + }; + let ip = IpMeta::from(Ipv4Meta { + src: priv_ip, + dst: outside_ip, + proto: Protocol::TCP, + }); + let ulp = UlpMeta::from(TcpMeta { + src: priv_port, + dst: outside_port, + flags: 0, + seq: 0, + ack: 0, + }); + + let mut pmo = PacketMeta { + outer: Default::default(), + inner: MetaGroup { + ether: Some(ether), + ip: Some(ip), + ulp: Some(ulp), + ..Default::default() + }, + }; + + // ================================================================ + // Verify descriptor generation. + // ================================================================ + let flow_out = InnerFlowId::from(&pmo); + let desc = match nat.gen_desc(&flow_out, &mut port_meta) { + Ok(AllowOrDeny::Allow(desc)) => desc, + _ => panic!("expected AllowOrDeny::Allow(desc) result"), + }; + + // ================================================================ + // Verify outbound header transformation + // ================================================================ + let out_ht = desc.gen_ht(Direction::Out); + out_ht.run(&mut pmo); + + let ether_meta = pmo.inner.ether.as_ref().unwrap(); + assert_eq!(ether_meta.src, priv_mac); + assert_eq!(ether_meta.dst, dest_mac); + + let ip4_meta = match pmo.inner.ip.as_ref().unwrap() { + IpMeta::Ip4(v) => v, + _ => panic!("expect Ipv4Meta"), + }; + + assert_eq!(ip4_meta.src, pub_ip); + assert_eq!(ip4_meta.dst, outside_ip); + assert_eq!(ip4_meta.proto, Protocol::TCP); + + let tcp_meta = match pmo.inner.ulp.as_ref().unwrap() { + UlpMeta::Tcp(v) => v, + _ => panic!("expect TcpMeta"), + }; + + assert_eq!(tcp_meta.src, priv_port); + assert_eq!(tcp_meta.dst, outside_port); + assert_eq!(tcp_meta.flags, 0); + + // ================================================================ + // Verify inbound header transformation. + // ================================================================ + let ether = EtherMeta { + src: dest_mac, + dst: priv_mac, + ether_type: ETHER_TYPE_IPV4, + }; + let ip = IpMeta::from(Ipv4Meta { + src: outside_ip, + dst: pub_ip, + proto: Protocol::TCP, + }); + let ulp = UlpMeta::from(TcpMeta { + src: outside_port, + dst: priv_port, + flags: 0, + seq: 0, + ack: 0, + }); + + let mut pmi = PacketMeta { + outer: Default::default(), + inner: MetaGroup { + ether: Some(ether), + ip: Some(ip), + ulp: Some(ulp), + ..Default::default() + }, + }; + + let in_ht = desc.gen_ht(Direction::In); + in_ht.run(&mut pmi); + + let ether_meta = pmi.inner.ether.as_ref().unwrap(); + assert_eq!(ether_meta.src, dest_mac); + assert_eq!(ether_meta.dst, priv_mac); + + let ip4_meta = match pmi.inner.ip.as_ref().unwrap() { + IpMeta::Ip4(v) => v, + _ => panic!("expect Ipv4Meta"), + }; + + assert_eq!(ip4_meta.src, outside_ip); + assert_eq!(ip4_meta.dst, priv_ip); + assert_eq!(ip4_meta.proto, Protocol::TCP); + + let tcp_meta = match pmi.inner.ulp.as_ref().unwrap() { + UlpMeta::Tcp(v) => v, + _ => panic!("expect TcpMeta"), + }; + + assert_eq!(tcp_meta.src, outside_port); + assert_eq!(tcp_meta.dst, priv_port); + assert_eq!(tcp_meta.flags, 0); + } +} diff --git a/opte/src/oxide_vpc/engine/arp.rs b/opte/src/oxide_vpc/engine/arp.rs index 4653db3f..193435af 100644 --- a/opte/src/oxide_vpc/engine/arp.rs +++ b/opte/src/oxide_vpc/engine/arp.rs @@ -27,13 +27,35 @@ pub fn setup( cfg: &PortCfg, ft_limit: core::num::NonZeroU32, ) -> core::result::Result<(), OpteError> { + let mut actions = vec![ + // ARP Reply for gateway's IP. + Action::Hairpin(Arc::new(ArpReply::new(cfg.gw_ip, cfg.gw_mac))), + ]; + + if let Some(snat) = &cfg.snat { + if cfg.proxy_arp_enable { + // XXX-EXT-IP Hack to get remote access to guest instance + // via SNAT (which is not what it's intended for, but I + // think it'll work). + // + // Reuse the same MAC address for both IPs. This should be + // fine as the VIP is contained solely to the guest + // instance. + actions.push(Action::Hairpin(Arc::new(ArpReply::new( + snat.public_ip, + cfg.private_mac, + )))); + } + } + let arp = Layer::new( "arp", pb.name(), - vec![ - // ARP Reply for gateway's IP. - Action::Hairpin(Arc::new(ArpReply::new(cfg.gw_ip, cfg.gw_mac))), - ], + // vec![ + // // ARP Reply for gateway's IP. + // Action::Hairpin(Arc::new(ArpReply::new(cfg.gw_ip, cfg.gw_mac))), + // ], + actions, ft_limit, ); @@ -55,6 +77,19 @@ pub fn setup( )])); arp.add_rule(Direction::Out, rule.finalize()); + // ================================================================ + // Proxy ARP for any incoming requests for guest's SNAT IP + // + // XXX-EXT-IP This is a hack to get guest access working until we + // have boundary services integrated. + // ================================================================ + if let Some(_) = &cfg.snat { + if cfg.proxy_arp_enable { + let rule = Rule::new(1, arp.action(1).unwrap().clone()); + arp.add_rule(Direction::In, rule.finalize()); + } + } + // ================================================================ // Drop all inbound ARP Requests // ================================================================ diff --git a/opte/src/oxide_vpc/engine/mod.rs b/opte/src/oxide_vpc/engine/mod.rs index b184b890..991a6904 100644 --- a/opte/src/oxide_vpc/engine/mod.rs +++ b/opte/src/oxide_vpc/engine/mod.rs @@ -8,6 +8,7 @@ pub mod arp; pub mod dhcp4; pub mod firewall; pub mod icmp; +pub mod nat4; pub mod overlay; pub mod router; pub mod snat4; diff --git a/opte/src/oxide_vpc/engine/nat4.rs b/opte/src/oxide_vpc/engine/nat4.rs new file mode 100644 index 00000000..0a816d1d --- /dev/null +++ b/opte/src/oxide_vpc/engine/nat4.rs @@ -0,0 +1,61 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2022 Oxide Computer Company + +cfg_if! { + if #[cfg(all(not(feature = "std"), not(test)))] { + use alloc::boxed::Box; + use alloc::sync::Arc; + } else { + use std::boxed::Box; + use std::sync::Arc; + } +} + +use super::router::{RouterTargetInternal, ROUTER_LAYER_NAME}; +use crate::api::{Direction, OpteError}; +use crate::engine::ether::ETHER_TYPE_IPV4; +use crate::engine::layer::Layer; +use crate::engine::nat::Nat4; +use crate::engine::port::{PortBuilder, Pos}; +use crate::engine::rule::{ + Action, EtherTypeMatch, Ipv4AddrMatch, Predicate, Rule, +}; +use crate::oxide_vpc::PortCfg; + +pub const NAT4_LAYER_NAME: &'static str = "nat4"; + +pub fn setup( + pb: &mut PortBuilder, + cfg: &PortCfg, + ft_limit: core::num::NonZeroU32, +) -> core::result::Result<(), OpteError> { + // XXX-EXT-IP This config should not some from SNAT. This is + // currently a hack assuming its use is in service of the + // ext_ip_hack flag. + let nat = Nat4::new(cfg.private_ip, cfg.snat.as_ref().unwrap().public_ip); + let layer = Layer::new( + NAT4_LAYER_NAME, + pb.name(), + vec![Action::Stateful(Arc::new(nat))], + ft_limit, + ); + let mut rule = Rule::new(1, layer.action(0).unwrap().clone()); + rule.add_predicate(Predicate::InnerEtherType(vec![EtherTypeMatch::Exact( + ETHER_TYPE_IPV4, + )])); + rule.add_predicate(Predicate::Meta(Box::new( + RouterTargetInternal::InternetGateway, + ))); + layer.add_rule(Direction::Out, rule.finalize()); + + let mut rule = Rule::new(1, layer.action(0).unwrap().clone()); + rule.add_predicate(Predicate::InnerDstIp4(vec![Ipv4AddrMatch::Exact( + cfg.snat.as_ref().unwrap().public_ip, + )])); + layer.add_rule(Direction::In, rule.finalize()); + + pb.add_layer(layer, Pos::After(ROUTER_LAYER_NAME)) +} diff --git a/opte/src/oxide_vpc/engine/snat4.rs b/opte/src/oxide_vpc/engine/snat4.rs index 3f83bf77..c7472155 100644 --- a/opte/src/oxide_vpc/engine/snat4.rs +++ b/opte/src/oxide_vpc/engine/snat4.rs @@ -54,5 +54,6 @@ pub fn setup( RouterTargetInternal::InternetGateway, ))); layer.add_rule(Direction::Out, rule.finalize()); + pb.add_layer(layer, Pos::After(ROUTER_LAYER_NAME)) } diff --git a/opte/src/oxide_vpc/mod.rs b/opte/src/oxide_vpc/mod.rs index d9d9fc21..ce3c51dc 100644 --- a/opte/src/oxide_vpc/mod.rs +++ b/opte/src/oxide_vpc/mod.rs @@ -46,6 +46,8 @@ cfg_if! { pub vni: Vni, pub phys_ip: Ipv6Addr, pub bsvc_addr: PhysNet, + // XXX-EXT-IP + pub proxy_arp_enable: bool, } } } diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 042eda8d..85d1883d 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -33,13 +33,15 @@ use opte::api::{ CmdOk, Direction, Ipv4Cidr, MacAddr, NoResp, OpteCmd, OpteCmdIoctl, OpteError, SetXdeUnderlayReq, }; -use opte::engine::ether::EtherAddr; +use opte::engine::ether::{EtherAddr, EtherType}; use opte::engine::geneve::Vni; use opte::engine::headers::{IpAddr, IpCidr}; use opte::engine::ioctl::{self as api}; use opte::engine::ip4::Ipv4Addr; use opte::engine::ip6::Ipv6Addr; -use opte::engine::packet::{Initialized, Packet, ParseError, Parsed}; +use opte::engine::packet::{ + Initialized, Packet, PacketRead, PacketReader, ParseError, Parsed, +}; use opte::engine::port::meta; use opte::engine::port::resources::Resources; use opte::engine::port::{Port, PortBuilder, ProcessResult}; @@ -52,7 +54,7 @@ use opte::oxide_vpc::api::{ SetFwRulesReq, SetVirt2PhysReq, }; use opte::oxide_vpc::engine::{ - arp, dhcp4, firewall, icmp, overlay, router, snat4, + arp, dhcp4, firewall, icmp, nat4, overlay, router, snat4, }; use opte::oxide_vpc::PortCfg; use opte::{CStr, CString, ExecCtx}; @@ -64,6 +66,7 @@ use opte::{CStr, CString, ExecCtx}; // // Unwrap: We know all of these are safe to unwrap(). const FW_FT_LIMIT: Option = NonZeroU32::new(8096); +const NAT_FT_LIMIT: Option = NonZeroU32::new(8096); const SNAT_FT_LIMIT: Option = NonZeroU32::new(8096); const FT_LIMIT_ONE: Option = NonZeroU32::new(1); const UFT_LIMIT: Option = NonZeroU32::new(8096); @@ -78,6 +81,9 @@ static mut xde_devs: KRwLock>> = KRwLock::new(Vec::new()); /// DDI dev info pointer to the attached xde device. static mut xde_dip: *mut dev_info = 0 as *mut dev_info; +#[no_mangle] +pub static mut xde_ext_ip_hack: i32 = 0; + // This block is purely for SDT probes. extern "C" { pub fn __dtrace_probe_bad__packet( @@ -646,6 +652,29 @@ unsafe extern "C" fn xde_attach( } xde_dip = dip; + + if !driver_prop_exists("ext_ip_hack") { + warn!("failed to find 'ext_ip_hack' property in xde.conf"); + return DDI_FAILURE; + } + + match get_driver_prop_bool("ext_ip_hack") { + Some(true) => { + warn!("ext_ip_hack enabled: traffic will NOT be encapsulated"); + xde_ext_ip_hack = 1; + } + + Some(_) => { + warn!("ext_ip_hack disabled: traffic will be encapsulated"); + xde_ext_ip_hack = 0; + } + + None => { + warn!("failed to read 'ext_ip_hack' from xde.conf, disabled"); + xde_ext_ip_hack = 0; + } + }; + let state = Box::new(XdeState::new()); ddi_set_driver_private(xde_dip, Box::into_raw(state) as *mut c_void); opte::engine::dbg(format!("dld_ioc_add: {:#?}", xde_ioc_list)); @@ -756,7 +785,7 @@ unsafe fn init_underlay_ingress_handlers( }; // set up promisc rx handlers for underlay devices - let u1_mph = match u2_mch.add_promisc( + let u1_mph = match u1_mch.add_promisc( mac::mac_client_promisc_type_t::MAC_CLIENT_PROMISC_ALL, xde_rx, ptr::null_mut(), @@ -812,6 +841,60 @@ unsafe fn init_underlay_ingress_handlers( }) } +#[no_mangle] +unsafe fn driver_prop_exists(pname: &str) -> bool { + let name = match CString::new(pname) { + Ok(s) => s, + Err(e) => { + warn!("bad driver prop string name: {}: {:?}", pname, e); + return false; + } + }; + + let ret = ddi_prop_exists( + DDI_DEV_T_ANY, + xde_dip, + DDI_PROP_DONTPASS, + name.as_ptr() as *const c_char, + ); + + ret == 1 +} + +#[no_mangle] +unsafe fn get_driver_prop_bool(pname: &str) -> Option { + let name = match CString::new(pname) { + Ok(s) => s, + Err(e) => { + warn!("bad driver prop string name: {}: {:?}", pname, e); + return None; + } + }; + + let ret = ddi_prop_get_int( + DDI_DEV_T_ANY, + xde_dip, + DDI_PROP_DONTPASS, + name.as_ptr() as *const c_char, + 99, + ); + + // Technically, the system could also return DDI_PROP_NOT_FOUND, + // which indicates the property cannot be decoded as an int. + // However, DDI_PROP_NOT_FOUND has a value of 1, which is totally + // broken given that 1 is a perfectly reasonable value for someone + // to want to use for their property. This means that from the + // perspective of the driver there is no way to differentiate + // between a true value of 1 and the case where the user entered + // gibberish. In this case we treat gibberish as true. + if ret == 99 { + warn!("driver prop {} not found", pname); + return None; + } + + Some(ret == 1) +} + #[no_mangle] unsafe fn get_driver_prop_string(pname: &str) -> Option { let name = match CString::new(pname) { @@ -1214,6 +1297,15 @@ unsafe extern "C" fn xde_mc_tx( let res = port.process(Direction::Out, &mut pkt, &mut meta, &rsrcs); match res { Ok(ProcessResult::Modified) => { + if xde_ext_ip_hack == 1 { + opte::engine::dbg(format!("[Tx] ext_ip_hack, bypass encap")); + // TODO need to special-case guest-loopback here as + // well if we want intra-guest comms to work when the + // ext_ip_hack is enabled. + mch.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); + return ptr::null_mut(); + } + // If the outer IPv6 destination is the same as the // source, then we need to loop the packet inbound to the // guest on this same host. @@ -1678,6 +1770,7 @@ fn new_port( ip: bsvc_ip, vni: bsvc_vni, }, + proxy_arp_enable: unsafe { xde_ext_ip_hack == 1 }, }; let mut pb = PortBuilder::new(&name, name_cstr, private_mac.into(), ectx); @@ -1689,9 +1782,23 @@ fn new_port( arp::setup(&mut pb, &port_cfg, FT_LIMIT_ONE.unwrap())?; router::setup(&mut pb, &port_cfg, FT_LIMIT_ONE.unwrap())?; if snat.is_some() { - snat4::setup(&mut pb, &port_cfg, SNAT_FT_LIMIT.unwrap())?; + // XXX This is a hack to allow incoming connections to the + // guest. In this case we hijack SNAT configuration and treat + // it as a public IP; performing 1:1 NAT. + if unsafe { xde_ext_ip_hack == 1 } { + nat4::setup(&mut pb, &port_cfg, NAT_FT_LIMIT.unwrap())?; + } else { + snat4::setup(&mut pb, &port_cfg, SNAT_FT_LIMIT.unwrap())?; + } } - overlay::setup(&pb, &port_cfg, FT_LIMIT_ONE.unwrap())?; + + if unsafe { xde_ext_ip_hack != 1 } { + warn!("enabling overlay for port: {}", name); + overlay::setup(&pb, &port_cfg, FT_LIMIT_ONE.unwrap())?; + } else { + warn!("disabling overlay for port: {}", name); + } + let port = Arc::new(pb.create(UFT_LIMIT.unwrap(), TCP_STATE_LIMIT.unwrap())); Ok((port, port_cfg)) @@ -1730,46 +1837,123 @@ unsafe extern "C" fn xde_rx( }; let hdrs = pkt.headers(); + //TODO create a fast lookup table + let devs = xde_devs.read(); - // determine where to send packet based on geneve vni - let outer = match hdrs.outer { - Some(ref outer) => outer, - None => { - // TODO add stat - let msg = "Rx bad packet: no outer header"; - bad_packet_probe(None, Direction::In, mp_chain, msg); - opte::engine::dbg(msg); - return; - } - }; + let dev = if xde_ext_ip_hack == 0 { + // determine where to send packet based on geneve vni + let outer = match hdrs.outer { + Some(ref outer) => outer, + None => { + // TODO add stat + let msg = "Rx bad packet: no outer header"; + bad_packet_probe(None, Direction::In, mp_chain, msg); + opte::engine::dbg(msg); + return; + } + }; - let geneve = match outer.encap { - Some(ref geneve) => geneve, - None => { - // TODO add SDT probe - // TODO add stat - opte::engine::dbg(format!("no geneve header, dropping")); - return; - } - }; + let geneve = match outer.encap { + Some(ref geneve) => geneve, + None => { + // TODO add SDT probe + // TODO add stat + opte::engine::dbg(format!("no geneve header, dropping")); + return; + } + }; + + let vni = geneve.vni; + let ether_dst = hdrs.inner.ether.dst(); + let dev = match devs + .iter() + .find(|x| x.vni == vni && x.port.mac_addr() == ether_dst) + { + Some(dev) => dev, + None => { + // TODO add SDT probe + // TODO add stat + opte::engine::dbg(format!( + "[encap] no device found for vni: {} mac: {}", + vni, ether_dst + )); + return; + } + }; + dev + } else { + let et = hdrs.inner.ether.ether_type(); + + // Learn the MAC address. + // if et == EtherType::Ipv4 { + // let ether_src = hdrs.inner.ether.src(); + // let ip4_src = hdrs.inner.ip.as_ref().unwrap().ip4().unwrap().src(); + // let state = get_xde_state(); + // state.arp.write().insert(ip4_src, ether_src); + // } + + let ether_dst = hdrs.inner.ether.dst(); + if ether_dst == EtherAddr::from(MacAddr::BROADCAST) { + let rdr = PacketReader::new(&pkt, ()); + let bytes = rdr.copy_remaining(); + drop(rdr); + + for dev in devs.iter() { + // just go straight to overlay in passthrough mode + if (*dev).passthrough { + mac::mac_rx((*dev).mh, mrh, mp_chain); + } + + let port = &(*dev).port; + let mut meta = meta::Meta::new(); + let mut rsrcs = Resources::new(); + let _ = rsrcs.add(dev.port_v2p.clone()); + if et == EtherType::Ipv4 { + let ether_src = hdrs.inner.ether.src(); + let _ = meta.replace(MacAddr::from(ether_src)); + } + + let mut pkt_copy = Packet::copy(&bytes).parse().unwrap(); + let res = port.process( + Direction::In, + &mut pkt_copy, + &mut meta, + &rsrcs, + ); + + match res { + Ok(ProcessResult::Modified) => { + mac::mac_rx((*dev).mh, mrh, pkt_copy.unwrap()); + } + Ok(ProcessResult::Hairpin(hppkt)) => { + // TODO assuming underlay device 1 + (*dev).u1.mch.tx_drop_on_no_desc( + hppkt, + 0, + MacTxFlags::empty(), + ); + } + Ok(ProcessResult::Bypass) => { + mac::mac_rx((*dev).mh, mrh, mp_chain); + } + _ => {} + } + } - //TODO create a fast lookup table - let devs = xde_devs.read(); - let vni = geneve.vni; - let ether_dst = hdrs.inner.ether.dst(); - let dev = match devs - .iter() - .find(|x| x.vni == vni && x.port.mac_addr() == ether_dst) - { - Some(dev) => dev, - None => { - // TODO add SDT probe - // TODO add stat - opte::engine::dbg(format!( - "no device found for vni: {} mac: {}", - vni, ether_dst - )); return; + } else { + match devs.iter().find(|x| x.port.mac_addr() == ether_dst) { + Some(dev) => dev, + None => { + // TODO add SDT probe + // TODO add stat + opte::engine::dbg(format!( + "[ext_ip_hack] no device found for mac: {}", + ether_dst + )); + return; + } + } } }; @@ -1782,6 +1966,15 @@ unsafe extern "C" fn xde_rx( let mut meta = meta::Meta::new(); let mut rsrcs = Resources::new(); let _ = rsrcs.add(dev.port_v2p.clone()); + + let et = hdrs.inner.ether.ether_type(); + if et == EtherType::Ipv4 { + // XXX-EXT-IP This is a hack to allow NAT action to have + // access to the source MAC address. + let ether_src = hdrs.inner.ether.src(); + let _ = meta.replace(MacAddr::from(ether_src)); + } + let res = port.process(Direction::In, &mut pkt, &mut meta, &rsrcs); match res { Ok(ProcessResult::Modified) => { diff --git a/xde/xde.conf b/xde/xde.conf index c168650b..2ad87abc 100644 --- a/xde/xde.conf +++ b/xde/xde.conf @@ -1,3 +1,11 @@ # xde kernel module configuration file name="xde" parent="pseudo" instance=0; + +# +# Enable the "external IP hack". This disables encap and turns SNAT +# into plain NAT, along with performing proxy ARP for the NAT IP. This +# allows one to have network connectivty to their guests via the local +# IPv4 network that the sled is sitting on. +# +ext_ip_hack = 0; \ No newline at end of file