diff --git a/internal/quic/conn.go b/internal/quic/conn.go index d6dbac1a93..cdf79d607c 100644 --- a/internal/quic/conn.go +++ b/internal/quic/conn.go @@ -9,6 +9,7 @@ package quic import ( "errors" "fmt" + "net/netip" "time" ) @@ -16,16 +17,37 @@ import ( // // Multiple goroutines may invoke methods on a Conn simultaneously. type Conn struct { + side connSide + listener connListener + testHooks connTestHooks + peerAddr netip.AddrPort + msgc chan any donec chan struct{} // closed when conn loop exits exited bool // set to make the conn loop exit immediately - testHooks connTestHooks + w packetWriter + acks [numberSpaceCount]ackState // indexed by number space + connIDState connIDState + tlsState tlsState + loss lossState // idleTimeout is the time at which the connection will be closed due to inactivity. // https://www.rfc-editor.org/rfc/rfc9000#section-10.1 maxIdleTimeout time.Duration idleTimeout time.Time + + peerAckDelayExponent int8 // -1 when unknown + + // Tests only: Send a PING in a specific number space. + testSendPingSpace numberSpace + testSendPing sentVal +} + +// The connListener is the Conn's Listener. +// Defined as an interface so we can swap it out in tests. +type connListener interface { + sendDatagram(p []byte, addr netip.AddrPort) error } // connTestHooks override conn behavior in tests. @@ -33,18 +55,41 @@ type connTestHooks interface { nextMessage(msgc chan any, nextTimeout time.Time) (now time.Time, message any) } -func newConn(now time.Time, hooks connTestHooks) (*Conn, error) { +func newConn(now time.Time, side connSide, initialConnID []byte, peerAddr netip.AddrPort, l connListener, hooks connTestHooks) (*Conn, error) { c := &Conn{ - donec: make(chan struct{}), - testHooks: hooks, - maxIdleTimeout: defaultMaxIdleTimeout, - idleTimeout: now.Add(defaultMaxIdleTimeout), + side: side, + listener: l, + peerAddr: peerAddr, + msgc: make(chan any, 1), + donec: make(chan struct{}), + testHooks: hooks, + maxIdleTimeout: defaultMaxIdleTimeout, + idleTimeout: now.Add(defaultMaxIdleTimeout), + peerAckDelayExponent: -1, } // A one-element buffer allows us to wake a Conn's event loop as a // non-blocking operation. c.msgc = make(chan any, 1) + if c.side == clientSide { + if err := c.connIDState.initClient(newRandomConnID); err != nil { + return nil, err + } + initialConnID = c.connIDState.dstConnID() + } else { + if err := c.connIDState.initServer(newRandomConnID, initialConnID); err != nil { + return nil, err + } + } + + // The smallest allowed maximum QUIC datagram size is 1200 bytes. + // TODO: PMTU discovery. + const maxDatagramSize = 1200 + c.loss.init(c.side, maxDatagramSize, now) + + c.tlsState.init(c.side, initialConnID) + go c.loop(now) return c, nil } @@ -76,7 +121,14 @@ func (c *Conn) loop(now time.Time) { } for !c.exited { - nextTimeout := c.idleTimeout + sendTimeout := c.maybeSend(now) // try sending + + // Note that we only need to consider the ack timer for the App Data space, + // since the Initial and Handshake spaces always ack immediately. + nextTimeout := sendTimeout + nextTimeout = firstTime(nextTimeout, c.idleTimeout) + nextTimeout = firstTime(nextTimeout, c.loss.timer) + nextTimeout = firstTime(nextTimeout, c.acks[appDataSpace].nextAck) var m any if hooks != nil { @@ -100,6 +152,9 @@ func (c *Conn) loop(now time.Time) { now = time.Now() } switch m := m.(type) { + case *datagram: + c.handleDatagram(now, m) + m.recycle() case timerEvent: // A connection timer has expired. if !now.Before(c.idleTimeout) { @@ -109,6 +164,7 @@ func (c *Conn) loop(now time.Time) { c.exited = true return } + c.loss.advance(now, c.handleAckOrLoss) case func(time.Time, *Conn): // Send a func to msgc to run it on the main Conn goroutine m(now, c) @@ -146,6 +202,12 @@ func (c *Conn) runOnLoop(f func(now time.Time, c *Conn)) error { return nil } +// abort terminates a connection with an error. +func (c *Conn) abort(now time.Time, err error) { + // TODO: Send CONNECTION_CLOSE frames. + c.exit() +} + // exit fully terminates a connection immediately. func (c *Conn) exit() { c.runOnLoop(func(now time.Time, c *Conn) { @@ -153,3 +215,17 @@ func (c *Conn) exit() { }) <-c.donec } + +// firstTime returns the earliest non-zero time, or zero if both times are zero. +func firstTime(a, b time.Time) time.Time { + switch { + case a.IsZero(): + return b + case b.IsZero(): + return a + case a.Before(b): + return a + default: + return b + } +} diff --git a/internal/quic/conn_loss.go b/internal/quic/conn_loss.go new file mode 100644 index 0000000000..11ed42dbb9 --- /dev/null +++ b/internal/quic/conn_loss.go @@ -0,0 +1,46 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import "fmt" + +// handleAckOrLoss deals with the final fate of a packet we sent: +// Either the peer acknowledges it, or we declare it lost. +// +// In order to handle packet loss, we must retain any information sent to the peer +// until the peer has acknowledged it. +// +// When information is acknowledged, we can discard it. +// +// When information is lost, we mark it for retransmission. +// See RFC 9000, Section 13.3 for a complete list of information which is retransmitted on loss. +// https://www.rfc-editor.org/rfc/rfc9000#section-13.3 +func (c *Conn) handleAckOrLoss(space numberSpace, sent *sentPacket, fate packetFate) { + // The list of frames in a sent packet is marshaled into a buffer in the sentPacket + // by the packetWriter. Unmarshal that buffer here. This code must be kept in sync with + // packetWriter.append*. + // + // A sent packet meets its fate (acked or lost) only once, so it's okay to consume + // the sentPacket's buffer here. + for !sent.done() { + switch f := sent.next(); f { + default: + panic(fmt.Sprintf("BUG: unhandled lost frame type %x", f)) + case frameTypeAck: + // Unlike most information, loss of an ACK frame does not trigger + // retransmission. ACKs are sent in response to ack-eliciting packets, + // and always contain the latest information available. + // + // Acknowledgement of an ACK frame may allow us to discard information + // about older packets. + largest := packetNumber(sent.nextInt()) + if fate == packetAcked { + c.acks[space].handleAck(largest) + } + } + } +} diff --git a/internal/quic/conn_recv.go b/internal/quic/conn_recv.go new file mode 100644 index 0000000000..d5a3b8cb0c --- /dev/null +++ b/internal/quic/conn_recv.go @@ -0,0 +1,264 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import ( + "time" +) + +func (c *Conn) handleDatagram(now time.Time, dgram *datagram) { + buf := dgram.b + c.loss.datagramReceived(now, len(buf)) + for len(buf) > 0 { + var n int + ptype := getPacketType(buf) + switch ptype { + case packetTypeInitial: + if c.side == serverSide && len(dgram.b) < minimumClientInitialDatagramSize { + // Discard client-sent Initial packets in too-short datagrams. + // https://www.rfc-editor.org/rfc/rfc9000#section-14.1-4 + return + } + n = c.handleLongHeader(now, ptype, initialSpace, buf) + case packetTypeHandshake: + n = c.handleLongHeader(now, ptype, handshakeSpace, buf) + case packetType1RTT: + n = c.handle1RTT(now, buf) + default: + return + } + if n <= 0 { + // Invalid data at the end of a datagram is ignored. + break + } + c.idleTimeout = now.Add(c.maxIdleTimeout) + buf = buf[n:] + } +} + +func (c *Conn) handleLongHeader(now time.Time, ptype packetType, space numberSpace, buf []byte) int { + if !c.tlsState.rkeys[space].isSet() { + return skipLongHeaderPacket(buf) + } + + pnumMax := c.acks[space].largestSeen() + p, n := parseLongHeaderPacket(buf, c.tlsState.rkeys[space], pnumMax) + if n < 0 { + return -1 + } + if p.reservedBits != 0 { + // https://www.rfc-editor.org/rfc/rfc9000#section-17.2-8.2.1 + c.abort(now, localTransportError(errProtocolViolation)) + return -1 + } + + if !c.acks[space].shouldProcess(p.num) { + return n + } + + c.connIDState.handlePacket(c.side, p.ptype, p.srcConnID) + ackEliciting := c.handleFrames(now, ptype, space, p.payload) + c.acks[space].receive(now, space, p.num, ackEliciting) + if p.ptype == packetTypeHandshake && c.side == serverSide { + c.loss.validateClientAddress() + + // TODO: Discard Initial keys. + // https://www.rfc-editor.org/rfc/rfc9001#section-4.9.1-2 + } + return n +} + +func (c *Conn) handle1RTT(now time.Time, buf []byte) int { + if !c.tlsState.rkeys[appDataSpace].isSet() { + // 1-RTT packets extend to the end of the datagram, + // so skip the remainder of the datagram if we can't parse this. + return len(buf) + } + + pnumMax := c.acks[appDataSpace].largestSeen() + p, n := parse1RTTPacket(buf, c.tlsState.rkeys[appDataSpace], connIDLen, pnumMax) + if n < 0 { + return -1 + } + if p.reservedBits != 0 { + // https://www.rfc-editor.org/rfc/rfc9000#section-17.3.1-4.8.1 + c.abort(now, localTransportError(errProtocolViolation)) + return -1 + } + + if !c.acks[appDataSpace].shouldProcess(p.num) { + return len(buf) + } + + ackEliciting := c.handleFrames(now, packetType1RTT, appDataSpace, p.payload) + c.acks[appDataSpace].receive(now, appDataSpace, p.num, ackEliciting) + return len(buf) +} + +func (c *Conn) handleFrames(now time.Time, ptype packetType, space numberSpace, payload []byte) (ackEliciting bool) { + if len(payload) == 0 { + // "An endpoint MUST treat receipt of a packet containing no frames + // as a connection error of type PROTOCOL_VIOLATION." + // https://www.rfc-editor.org/rfc/rfc9000#section-12.4-3 + c.abort(now, localTransportError(errProtocolViolation)) + return false + } + // frameOK verifies that ptype is one of the packets in mask. + frameOK := func(c *Conn, ptype, mask packetType) (ok bool) { + if ptype&mask == 0 { + // "An endpoint MUST treat receipt of a frame in a packet type + // that is not permitted as a connection error of type + // PROTOCOL_VIOLATION." + // https://www.rfc-editor.org/rfc/rfc9000#section-12.4-3 + c.abort(now, localTransportError(errProtocolViolation)) + return false + } + return true + } + // Packet masks from RFC 9000 Table 3. + // https://www.rfc-editor.org/rfc/rfc9000#table-3 + const ( + IH_1 = packetTypeInitial | packetTypeHandshake | packetType1RTT + __01 = packetType0RTT | packetType1RTT + ___1 = packetType1RTT + ) + for len(payload) > 0 { + switch payload[0] { + case frameTypePadding, frameTypeAck, frameTypeAckECN, + frameTypeConnectionCloseTransport, frameTypeConnectionCloseApplication: + default: + ackEliciting = true + } + n := -1 + switch payload[0] { + case frameTypePadding: + // PADDING is OK in all spaces. + n = 1 + case frameTypePing: + // PING is OK in all spaces. + // + // A PING frame causes us to respond with an ACK by virtue of being + // an ack-eliciting frame, but requires no other action. + n = 1 + case frameTypeAck, frameTypeAckECN: + if !frameOK(c, ptype, IH_1) { + return + } + n = c.handleAckFrame(now, space, payload) + case frameTypeResetStream: + if !frameOK(c, ptype, __01) { + return + } + _, _, _, n = consumeResetStreamFrame(payload) + case frameTypeStopSending: + if !frameOK(c, ptype, __01) { + return + } + _, _, n = consumeStopSendingFrame(payload) + case frameTypeCrypto: + if !frameOK(c, ptype, IH_1) { + return + } + _, _, n = consumeCryptoFrame(payload) + case frameTypeNewToken: + if !frameOK(c, ptype, ___1) { + return + } + _, n = consumeNewTokenFrame(payload) + case 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f: // STREAM + if !frameOK(c, ptype, __01) { + return + } + _, _, _, _, n = consumeStreamFrame(payload) + case frameTypeMaxData: + if !frameOK(c, ptype, __01) { + return + } + _, n = consumeMaxDataFrame(payload) + case frameTypeMaxStreamData: + if !frameOK(c, ptype, __01) { + return + } + _, _, n = consumeMaxStreamDataFrame(payload) + case frameTypeMaxStreamsBidi, frameTypeMaxStreamsUni: + if !frameOK(c, ptype, __01) { + return + } + _, _, n = consumeMaxStreamsFrame(payload) + case frameTypeStreamsBlockedBidi, frameTypeStreamsBlockedUni: + if !frameOK(c, ptype, __01) { + return + } + _, _, n = consumeStreamsBlockedFrame(payload) + case frameTypeStreamDataBlocked: + if !frameOK(c, ptype, __01) { + return + } + _, _, n = consumeStreamDataBlockedFrame(payload) + case frameTypeNewConnectionID: + if !frameOK(c, ptype, __01) { + return + } + _, _, _, _, n = consumeNewConnectionIDFrame(payload) + case frameTypeConnectionCloseTransport: + // CONNECTION_CLOSE is OK in all spaces. + _, _, _, n = consumeConnectionCloseTransportFrame(payload) + case frameTypeConnectionCloseApplication: + // CONNECTION_CLOSE is OK in all spaces. + _, _, n = consumeConnectionCloseApplicationFrame(payload) + case frameTypeHandshakeDone: + if !frameOK(c, ptype, ___1) { + return + } + n = 1 + } + if n < 0 { + c.abort(now, localTransportError(errFrameEncoding)) + return false + } + payload = payload[n:] + } + return ackEliciting +} + +func (c *Conn) handleAckFrame(now time.Time, space numberSpace, payload []byte) int { + c.loss.receiveAckStart() + _, ackDelay, n := consumeAckFrame(payload, func(rangeIndex int, start, end packetNumber) { + if end > c.loss.nextNumber(space) { + // Acknowledgement of a packet we never sent. + c.abort(now, localTransportError(errProtocolViolation)) + return + } + c.loss.receiveAckRange(now, space, rangeIndex, start, end, c.handleAckOrLoss) + }) + // Prior to receiving the peer's transport parameters, we cannot + // interpret the ACK Delay field because we don't know the ack_delay_exponent + // to apply. + // + // For servers, we should always know the ack_delay_exponent because the + // client's transport parameters are carried in its Initial packets and we + // won't send an ack-eliciting Initial packet until after receiving the last + // client Initial packet. + // + // For clients, we won't receive the server's transport parameters until handling + // its Handshake flight, which will probably happen after reading its ACK for our + // Initial packet(s). However, the peer's acknowledgement delay cannot reduce our + // adjusted RTT sample below min_rtt, and min_rtt is generally going to be set + // by the packet containing the ACK for our Initial flight. Therefore, the + // ACK Delay for an ACK in the Initial space is likely to be ignored anyway. + // + // Long story short, setting the delay to 0 prior to reading transport parameters + // is usually going to have no effect, will have only a minor effect in the rare + // cases when it happens, and there aren't any good alternatives anyway since we + // can't interpret the ACK Delay field without knowing the exponent. + var delay time.Duration + if c.peerAckDelayExponent >= 0 { + delay = ackDelay.Duration(uint8(c.peerAckDelayExponent)) + } + c.loss.receiveAckEnd(now, space, delay, c.handleAckOrLoss) + return n +} diff --git a/internal/quic/conn_send.go b/internal/quic/conn_send.go new file mode 100644 index 0000000000..3a51ceb285 --- /dev/null +++ b/internal/quic/conn_send.go @@ -0,0 +1,255 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import ( + "time" +) + +// maybeSend sends datagrams, if possible. +// +// If sending is blocked by pacing, it returns the next time +// a datagram may be sent. +func (c *Conn) maybeSend(now time.Time) (next time.Time) { + // Assumption: The congestion window is not underutilized. + // If congestion control, pacing, and anti-amplification all permit sending, + // but we have no packet to send, then we will declare the window underutilized. + c.loss.cc.setUnderutilized(false) + + // Send one datagram on each iteration of this loop, + // until we hit a limit or run out of data to send. + // + // For each number space where we have write keys, + // attempt to construct a packet in that space. + // If the packet contains no frames (we have no data in need of sending), + // abandon the packet. + // + // Speculatively constructing packets means we don't need + // separate code paths for "do we have data to send?" and + // "send the data" that need to be kept in sync. + for { + limit, next := c.loss.sendLimit(now) + if limit == ccBlocked { + // If anti-amplification blocks sending, then no packet can be sent. + return next + } + // We may still send ACKs, even if congestion control or pacing limit sending. + + // Prepare to write a datagram of at most maxSendSize bytes. + c.w.reset(c.loss.maxSendSize()) + + // Initial packet. + pad := false + var sentInitial *sentPacket + if k := c.tlsState.wkeys[initialSpace]; k.isSet() { + pnumMaxAcked := c.acks[initialSpace].largestSeen() + pnum := c.loss.nextNumber(initialSpace) + p := longPacket{ + ptype: packetTypeInitial, + version: 1, + num: pnum, + dstConnID: c.connIDState.dstConnID(), + srcConnID: c.connIDState.srcConnID(), + } + c.w.startProtectedLongHeaderPacket(pnumMaxAcked, p) + c.appendFrames(now, initialSpace, pnum, limit) + sentInitial = c.w.finishProtectedLongHeaderPacket(pnumMaxAcked, k, p) + if sentInitial != nil { + // Client initial packets need to be sent in a datagram padded to + // at least 1200 bytes. We can't add the padding yet, however, + // since we may want to coalesce additional packets with this one. + if c.side == clientSide || sentInitial.ackEliciting { + pad = true + } + } + } + + // Handshake packet. + if k := c.tlsState.wkeys[handshakeSpace]; k.isSet() { + pnumMaxAcked := c.acks[handshakeSpace].largestSeen() + pnum := c.loss.nextNumber(handshakeSpace) + p := longPacket{ + ptype: packetTypeHandshake, + version: 1, + num: pnum, + dstConnID: c.connIDState.dstConnID(), + srcConnID: c.connIDState.srcConnID(), + } + c.w.startProtectedLongHeaderPacket(pnumMaxAcked, p) + c.appendFrames(now, handshakeSpace, pnum, limit) + if sent := c.w.finishProtectedLongHeaderPacket(pnumMaxAcked, k, p); sent != nil { + c.loss.packetSent(now, handshakeSpace, sent) + if c.side == clientSide { + // TODO: Discard the Initial keys. + // https://www.rfc-editor.org/rfc/rfc9001.html#section-4.9.1 + } + } + } + + // 1-RTT packet. + if k := c.tlsState.wkeys[appDataSpace]; k.isSet() { + pnumMaxAcked := c.acks[appDataSpace].largestSeen() + pnum := c.loss.nextNumber(appDataSpace) + dstConnID := c.connIDState.dstConnID() + c.w.start1RTTPacket(pnum, pnumMaxAcked, dstConnID) + c.appendFrames(now, appDataSpace, pnum, limit) + if pad && len(c.w.payload()) > 0 { + // 1-RTT packets have no length field and extend to the end + // of the datagram, so if we're sending a datagram that needs + // padding we need to add it inside the 1-RTT packet. + c.w.appendPaddingTo(minimumClientInitialDatagramSize) + pad = false + } + if sent := c.w.finish1RTTPacket(pnum, pnumMaxAcked, dstConnID, k); sent != nil { + c.loss.packetSent(now, appDataSpace, sent) + } + } + + buf := c.w.datagram() + if len(buf) == 0 { + if limit == ccOK { + // We have nothing to send, and congestion control does not + // block sending. The congestion window is underutilized. + c.loss.cc.setUnderutilized(true) + } + return next + } + + if sentInitial != nil { + if pad { + // Pad out the datagram with zeros, coalescing the Initial + // packet with invalid packets that will be ignored by the peer. + // https://www.rfc-editor.org/rfc/rfc9000.html#section-14.1-1 + for len(buf) < minimumClientInitialDatagramSize { + buf = append(buf, 0) + // Technically this padding isn't in any packet, but + // account it to the Initial packet in this datagram + // for purposes of flow control and loss recovery. + sentInitial.size++ + sentInitial.inFlight = true + } + } + if k := c.tlsState.wkeys[initialSpace]; k.isSet() { + c.loss.packetSent(now, initialSpace, sentInitial) + } + } + + c.listener.sendDatagram(buf, c.peerAddr) + } +} + +func (c *Conn) appendFrames(now time.Time, space numberSpace, pnum packetNumber, limit ccLimit) { + shouldSendAck := c.acks[space].shouldSendAck(now) + if limit != ccOK { + // ACKs are not limited by congestion control. + if shouldSendAck && c.appendAckFrame(now, space) { + c.acks[space].sentAck() + } + return + } + // We want to send an ACK frame if the ack controller wants to send a frame now, + // OR if we are sending a packet anyway and have ack-eliciting packets which we + // have not yet acked. + // + // We speculatively add ACK frames here, to put them at the front of the packet + // to avoid truncation. + // + // After adding all frames, if we don't need to send an ACK frame and have not + // added any other frames, we abandon the packet. + if c.appendAckFrame(now, space) { + defer func() { + // All frames other than ACK and PADDING are ack-eliciting, + // so if the packet is ack-eliciting we've added additional + // frames to it. + if shouldSendAck || c.w.sent.ackEliciting { + // Either we are willing to send an ACK-only packet, + // or we've added additional frames. + c.acks[space].sentAck() + } else { + // There's nothing in this packet but ACK frames, and + // we don't want to send an ACK-only packet at this time. + // Abandoning the packet means we wrote an ACK frame for + // nothing, but constructing the frame is cheap. + c.w.abandonPacket() + } + }() + } + if limit != ccOK { + return + } + pto := c.loss.ptoExpired + + // TODO: Add all the other frames we can send. + + // Test-only PING frames. + if space == c.testSendPingSpace && c.testSendPing.shouldSendPTO(pto) { + if !c.w.appendPingFrame() { + return + } + c.testSendPing.setSent(pnum) + } + + // If this is a PTO probe and we haven't added an ack-eliciting frame yet, + // add a PING to make this an ack-eliciting probe. + // + // Technically, there are separate PTO timers for each number space. + // When a PTO timer expires, we MUST send an ack-eliciting packet in the + // timer's space. We SHOULD send ack-eliciting packets in every other space + // with in-flight data. (RFC 9002, section 6.2.4) + // + // What we actually do is send a single datagram containing an ack-eliciting packet + // for every space for which we have keys. + // + // We fill the PTO probe packets with new or unacknowledged data. For example, + // a PTO probe sent for the Initial space will generally retransmit previously + // sent but unacknowledged CRYPTO data. + // + // When sending a PTO probe datagram containing multiple packets, it is + // possible that an earlier packet will fill up the datagram, leaving no + // space for the remaining probe packet(s). This is not a problem in practice. + // + // A client discards Initial keys when it first sends a Handshake packet + // (RFC 9001 Section 4.9.1). Handshake keys are discarded when the handshake + // is confirmed (RFC 9001 Section 4.9.2). The PTO timer is not set for the + // Application Data packet number space until the handshake is confirmed + // (RFC 9002 Section 6.2.1). Therefore, the only times a PTO probe can fire + // while data for multiple spaces is in flight are: + // + // - a server's Initial or Handshake timers can fire while Initial and Handshake + // data is in flight; and + // + // - a client's Handshake timer can fire while Handshake and Application Data + // data is in flight. + // + // It is theoretically possible for a server's Initial CRYPTO data to overflow + // the maximum datagram size, but unlikely in practice; this space contains + // only the ServerHello TLS message, which is small. It's also unlikely that + // the Handshake PTO probe will fire while Initial data is in flight (this + // requires not just that the Initial CRYPTO data completely fill a datagram, + // but a quite specific arrangement of lost and retransmitted packets.) + // We don't bother worrying about this case here, since the worst case is + // that we send a PTO probe for the in-flight Initial data and drop the + // Handshake probe. + // + // If a client's Handshake PTO timer fires while Application Data data is in + // flight, it is possible that the resent Handshake CRYPTO data will crowd + // out the probe for the Application Data space. However, since this probe is + // optional (recall that the Application Data PTO timer is never set until + // after Handshake keys have been discarded), dropping it is acceptable. + if pto && !c.w.sent.ackEliciting { + c.w.appendPingFrame() + } +} + +func (c *Conn) appendAckFrame(now time.Time, space numberSpace) bool { + seen, delay := c.acks[space].acksToSend(now) + if len(seen) == 0 { + return false + } + d := unscaledAckDelayFromDuration(delay, ackDelayExponent) + return c.w.appendAckFrame(seen, d) +} diff --git a/internal/quic/conn_test.go b/internal/quic/conn_test.go index a1709958e1..6bb12e210e 100644 --- a/internal/quic/conn_test.go +++ b/internal/quic/conn_test.go @@ -7,7 +7,12 @@ package quic import ( + "errors" + "fmt" "math" + "net/netip" + "reflect" + "strings" "testing" "time" ) @@ -46,6 +51,52 @@ func TestConnTestConn(t *testing.T) { } } +type testDatagram struct { + packets []*testPacket + paddedSize int +} + +func (d testDatagram) String() string { + var b strings.Builder + fmt.Fprintf(&b, "datagram with %v packets", len(d.packets)) + if d.paddedSize > 0 { + fmt.Fprintf(&b, " (padded to %v bytes)", d.paddedSize) + } + b.WriteString(":") + for _, p := range d.packets { + b.WriteString("\n") + b.WriteString(p.String()) + } + return b.String() +} + +type testPacket struct { + ptype packetType + version uint32 + num packetNumber + dstConnID []byte + srcConnID []byte + frames []debugFrame +} + +func (p testPacket) String() string { + var b strings.Builder + fmt.Fprintf(&b, " %v %v", p.ptype, p.num) + if p.version != 0 { + fmt.Fprintf(&b, " version=%v", p.version) + } + if p.srcConnID != nil { + fmt.Fprintf(&b, " src={%x}", p.srcConnID) + } + if p.dstConnID != nil { + fmt.Fprintf(&b, " dst={%x}", p.dstConnID) + } + for _, f := range p.frames { + fmt.Fprintf(&b, "\n %v", f) + } + return b.String() +} + // A testConn is a Conn whose external interactions (sending and receiving packets, // setting timers) can be manipulated in tests. type testConn struct { @@ -55,6 +106,30 @@ type testConn struct { timer time.Time timerLastFired time.Time idlec chan struct{} // only accessed on the conn's loop + + // Read and write keys are distinct from the conn's keys, + // because the test may know about keys before the conn does. + // For example, when sending a datagram with coalesced + // Initial and Handshake packets to a client conn, + // we use Handshake keys to encrypt the packet. + // The client only acquires those keys when it processes + // the Initial packet. + rkeys [numberSpaceCount]keys // for packets sent to the conn + wkeys [numberSpaceCount]keys // for packets sent by the conn + + // Information about the conn's (fake) peer. + peerConnID []byte // source conn id of peer's packets + peerNextPacketNum [numberSpaceCount]packetNumber // next packet number to use + + // Datagrams, packets, and frames sent by the conn, + // but not yet processed by the test. + sentDatagrams [][]byte + sentPackets []*testPacket + sentFrames []debugFrame + sentFramePacketType packetType + + // Frame types to ignore in tests. + ignoreFrames map[byte]bool } // newTestConn creates a Conn for testing. @@ -65,17 +140,41 @@ type testConn struct { func newTestConn(t *testing.T, side connSide) *testConn { t.Helper() tc := &testConn{ - t: t, - now: time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC), + t: t, + now: time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC), + peerConnID: []byte{0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5}, + ignoreFrames: map[byte]bool{ + frameTypePadding: true, // ignore PADDING by default + }, } t.Cleanup(tc.cleanup) - conn, err := newConn(tc.now, (*testConnHooks)(tc)) + var initialConnID []byte + if side == serverSide { + // The initial connection ID for the server is chosen by the client. + // When creating a server-side connection, pick a random connection ID here. + var err error + initialConnID, err = newRandomConnID() + if err != nil { + tc.t.Fatal(err) + } + } + + conn, err := newConn( + tc.now, + side, + initialConnID, + netip.MustParseAddrPort("127.0.0.1:443"), + (*testConnListener)(tc), + (*testConnHooks)(tc)) if err != nil { tc.t.Fatal(err) } tc.conn = conn + tc.wkeys[initialSpace] = conn.tlsState.wkeys[initialSpace] + tc.rkeys[initialSpace] = conn.tlsState.rkeys[initialSpace] + tc.wait() return tc } @@ -108,6 +207,16 @@ func (tc *testConn) advanceToTimer() { tc.advanceTo(tc.timer) } +func (tc *testConn) timerDelay() time.Duration { + if tc.timer.IsZero() { + return math.MaxInt64 // infinite + } + if tc.timer.Before(tc.now) { + return 0 + } + return tc.timer.Sub(tc.now) +} + const infiniteDuration = time.Duration(math.MaxInt64) // timeUntilEvent returns the amount of time until the next connection event. @@ -155,6 +264,277 @@ func (tc *testConn) cleanup() { tc.conn.exit() } +// write sends the Conn a datagram. +func (tc *testConn) write(d *testDatagram) { + tc.t.Helper() + var buf []byte + for _, p := range d.packets { + space := spaceForPacketType(p.ptype) + if p.num >= tc.peerNextPacketNum[space] { + tc.peerNextPacketNum[space] = p.num + 1 + } + buf = append(buf, tc.encodeTestPacket(p)...) + } + for len(buf) < d.paddedSize { + buf = append(buf, 0) + } + tc.conn.sendMsg(&datagram{ + b: buf, + }) + tc.wait() +} + +// writeFrame sends the Conn a datagram containing the given frames. +func (tc *testConn) writeFrames(ptype packetType, frames ...debugFrame) { + tc.t.Helper() + space := spaceForPacketType(ptype) + dstConnID := tc.conn.connIDState.local[0].cid + if tc.conn.connIDState.local[0].seq == -1 && ptype != packetTypeInitial { + // Only use the transient connection ID in Initial packets. + dstConnID = tc.conn.connIDState.local[1].cid + } + d := &testDatagram{ + packets: []*testPacket{{ + ptype: ptype, + num: tc.peerNextPacketNum[space], + frames: frames, + version: 1, + dstConnID: dstConnID, + srcConnID: tc.peerConnID, + }}, + } + if ptype == packetTypeInitial && tc.conn.side == serverSide { + d.paddedSize = 1200 + } + tc.write(d) +} + +// ignoreFrame hides frames of the given type sent by the Conn. +func (tc *testConn) ignoreFrame(frameType byte) { + tc.ignoreFrames[frameType] = true +} + +// readDatagram reads the next datagram sent by the Conn. +// It returns nil if the Conn has no more datagrams to send at this time. +func (tc *testConn) readDatagram() *testDatagram { + tc.t.Helper() + tc.wait() + tc.sentPackets = nil + tc.sentFrames = nil + if len(tc.sentDatagrams) == 0 { + return nil + } + buf := tc.sentDatagrams[0] + tc.sentDatagrams = tc.sentDatagrams[1:] + return tc.parseTestDatagram(buf) +} + +// readPacket reads the next packet sent by the Conn. +// It returns nil if the Conn has no more packets to send at this time. +func (tc *testConn) readPacket() *testPacket { + tc.t.Helper() + for len(tc.sentPackets) == 0 { + d := tc.readDatagram() + if d == nil { + return nil + } + tc.sentPackets = d.packets + } + p := tc.sentPackets[0] + tc.sentPackets = tc.sentPackets[1:] + return p +} + +// readFrame reads the next frame sent by the Conn. +// It returns nil if the Conn has no more frames to send at this time. +func (tc *testConn) readFrame() (debugFrame, packetType) { + tc.t.Helper() + for len(tc.sentFrames) == 0 { + p := tc.readPacket() + if p == nil { + return nil, packetTypeInvalid + } + tc.sentFramePacketType = p.ptype + tc.sentFrames = p.frames + } + f := tc.sentFrames[0] + tc.sentFrames = tc.sentFrames[1:] + return f, tc.sentFramePacketType +} + +// wantDatagram indicates that we expect the Conn to send a datagram. +func (tc *testConn) wantDatagram(expectation string, want *testDatagram) { + tc.t.Helper() + got := tc.readDatagram() + if !reflect.DeepEqual(got, want) { + tc.t.Fatalf("%v:\ngot datagram: %v\nwant datagram: %v", expectation, got, want) + } +} + +// wantPacket indicates that we expect the Conn to send a packet. +func (tc *testConn) wantPacket(expectation string, want *testPacket) { + tc.t.Helper() + got := tc.readPacket() + if !reflect.DeepEqual(got, want) { + tc.t.Fatalf("%v:\ngot packet: %v\nwant packet: %v", expectation, got, want) + } +} + +// wantFrame indicates that we expect the Conn to send a frame. +func (tc *testConn) wantFrame(expectation string, wantType packetType, want debugFrame) { + tc.t.Helper() + got, gotType := tc.readFrame() + if got == nil { + tc.t.Fatalf("%v:\nconnection is idle\nwant %v frame: %v", expectation, wantType, want) + } + if gotType != wantType { + tc.t.Fatalf("%v:\ngot %v packet, want %v", expectation, wantType, want) + } + if !reflect.DeepEqual(got, want) { + tc.t.Fatalf("%v:\ngot frame: %v\nwant frame: %v", expectation, got, want) + } +} + +// wantIdle indicates that we expect the Conn to not send any more frames. +func (tc *testConn) wantIdle(expectation string) { + tc.t.Helper() + switch { + case len(tc.sentFrames) > 0: + tc.t.Fatalf("expect: %v\nunexpectedly got: %v", expectation, tc.sentFrames[0]) + case len(tc.sentPackets) > 0: + tc.t.Fatalf("expect: %v\nunexpectedly got: %v", expectation, tc.sentPackets[0]) + } + if f, _ := tc.readFrame(); f != nil { + tc.t.Fatalf("expect: %v\nunexpectedly got: %v", expectation, f) + } +} + +func (tc *testConn) encodeTestPacket(p *testPacket) []byte { + tc.t.Helper() + var w packetWriter + w.reset(1200) + var pnumMaxAcked packetNumber + if p.ptype != packetType1RTT { + w.startProtectedLongHeaderPacket(pnumMaxAcked, longPacket{ + ptype: p.ptype, + version: p.version, + num: p.num, + dstConnID: p.dstConnID, + srcConnID: p.srcConnID, + }) + } else { + w.start1RTTPacket(p.num, pnumMaxAcked, p.dstConnID) + } + for _, f := range p.frames { + f.write(&w) + } + space := spaceForPacketType(p.ptype) + if !tc.rkeys[space].isSet() { + tc.t.Fatalf("sending packet with no %v keys available", space) + return nil + } + if p.ptype != packetType1RTT { + w.finishProtectedLongHeaderPacket(pnumMaxAcked, tc.rkeys[space], longPacket{ + ptype: p.ptype, + version: p.version, + num: p.num, + dstConnID: p.dstConnID, + srcConnID: p.srcConnID, + }) + } else { + w.finish1RTTPacket(p.num, pnumMaxAcked, p.dstConnID, tc.rkeys[space]) + } + return w.datagram() +} + +func (tc *testConn) parseTestDatagram(buf []byte) *testDatagram { + tc.t.Helper() + bufSize := len(buf) + d := &testDatagram{} + for len(buf) > 0 { + if buf[0] == 0 { + d.paddedSize = bufSize + break + } + ptype := getPacketType(buf) + space := spaceForPacketType(ptype) + if !tc.wkeys[space].isSet() { + tc.t.Fatalf("no keys for space %v, packet type %v", space, ptype) + } + if isLongHeader(buf[0]) { + var pnumMax packetNumber // TODO: Track packet numbers. + p, n := parseLongHeaderPacket(buf, tc.wkeys[space], pnumMax) + if n < 0 { + tc.t.Fatalf("packet parse error") + } + frames, err := tc.parseTestFrames(p.payload) + if err != nil { + tc.t.Fatal(err) + } + d.packets = append(d.packets, &testPacket{ + ptype: p.ptype, + version: p.version, + num: p.num, + dstConnID: p.dstConnID, + srcConnID: p.srcConnID, + frames: frames, + }) + buf = buf[n:] + } else { + var pnumMax packetNumber // TODO: Track packet numbers. + p, n := parse1RTTPacket(buf, tc.wkeys[space], len(tc.peerConnID), pnumMax) + if n < 0 { + tc.t.Fatalf("packet parse error") + } + dstConnID, _ := dstConnIDForDatagram(buf) + frames, err := tc.parseTestFrames(p.payload) + if err != nil { + tc.t.Fatal(err) + } + d.packets = append(d.packets, &testPacket{ + ptype: packetType1RTT, + num: p.num, + dstConnID: dstConnID, + frames: frames, + }) + buf = buf[n:] + } + } + return d +} + +func (tc *testConn) parseTestFrames(payload []byte) ([]debugFrame, error) { + tc.t.Helper() + var frames []debugFrame + for len(payload) > 0 { + f, n := parseDebugFrame(payload) + if n < 0 { + return nil, errors.New("error parsing frames") + } + if !tc.ignoreFrames[payload[0]] { + frames = append(frames, f) + } + payload = payload[n:] + } + return frames, nil +} + +func spaceForPacketType(ptype packetType) numberSpace { + switch ptype { + case packetTypeInitial: + return initialSpace + case packetType0RTT: + panic("TODO: packetType0RTT") + case packetTypeHandshake: + return handshakeSpace + case packetTypeRetry: + panic("TODO: packetTypeRetry") + case packetType1RTT: + return appDataSpace + } + panic("unknown packet type") +} + // testConnHooks implements connTestHooks. type testConnHooks testConn @@ -186,3 +566,11 @@ func (tc *testConnHooks) nextMessage(msgc chan any, timer time.Time) (now time.T m = <-msgc return tc.now, m } + +// testConnListener implements connListener. +type testConnListener testConn + +func (tc *testConnListener) sendDatagram(p []byte, addr netip.AddrPort) error { + tc.sentDatagrams = append(tc.sentDatagrams, append([]byte(nil), p...)) + return nil +} diff --git a/internal/quic/dgram.go b/internal/quic/dgram.go new file mode 100644 index 0000000000..79e6650fa4 --- /dev/null +++ b/internal/quic/dgram.go @@ -0,0 +1,38 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import ( + "net/netip" + "sync" +) + +type datagram struct { + b []byte + addr netip.AddrPort +} + +var datagramPool = sync.Pool{ + New: func() any { + return &datagram{ + b: make([]byte, maxUDPPayloadSize), + } + }, +} + +func newDatagram() *datagram { + m := datagramPool.Get().(*datagram) + m.b = m.b[:cap(m.b)] + return m +} + +func (m *datagram) recycle() { + if cap(m.b) != maxUDPPayloadSize { + return + } + datagramPool.Put(m) +} diff --git a/internal/quic/frame_debug.go b/internal/quic/frame_debug.go index 945bb9d1f7..3009a04507 100644 --- a/internal/quic/frame_debug.go +++ b/internal/quic/frame_debug.go @@ -120,7 +120,7 @@ type debugFrameAck struct { func parseDebugFrameAck(b []byte) (f debugFrameAck, n int) { f.ranges = nil - _, f.ackDelay, n = consumeAckFrame(b, func(start, end packetNumber) { + _, f.ackDelay, n = consumeAckFrame(b, func(_ int, start, end packetNumber) { f.ranges = append(f.ranges, i64range[packetNumber]{ start: start, end: end, diff --git a/internal/quic/packet_parser.go b/internal/quic/packet_parser.go index 908a82ed90..c22f031038 100644 --- a/internal/quic/packet_parser.go +++ b/internal/quic/packet_parser.go @@ -91,7 +91,7 @@ func parseLongHeaderPacket(pkt []byte, k keys, pnumMax packetNumber) (p longPack pnumOff := len(pkt) - len(b) pkt = pkt[:pnumOff+int(payLen)] - if k.initialized() { + if k.isSet() { var err error p.payload, p.num, err = k.unprotect(pkt, pnumOff, pnumMax) if err != nil { @@ -162,7 +162,7 @@ func parse1RTTPacket(pkt []byte, k keys, dstConnIDLen int, pnumMax packetNumber) // which includes both general parse failures and specific violations of frame // constraints. -func consumeAckFrame(frame []byte, f func(start, end packetNumber)) (largest packetNumber, ackDelay unscaledAckDelay, n int) { +func consumeAckFrame(frame []byte, f func(rangeIndex int, start, end packetNumber)) (largest packetNumber, ackDelay unscaledAckDelay, n int) { b := frame[1:] // type largestAck, n := consumeVarint(b) @@ -195,7 +195,7 @@ func consumeAckFrame(frame []byte, f func(start, end packetNumber)) (largest pac if rangeMin < 0 || rangeMin > rangeMax { return 0, 0, -1 } - f(rangeMin, rangeMax+1) + f(int(i), rangeMin, rangeMax+1) if i == ackRangeCount { break diff --git a/internal/quic/packet_protection.go b/internal/quic/packet_protection.go index 1f0a735e8e..18470536ff 100644 --- a/internal/quic/packet_protection.go +++ b/internal/quic/packet_protection.go @@ -163,8 +163,8 @@ func (k keys) xorIV(pnum packetNumber) { k.iv[len(k.iv)-1] ^= uint8(pnum) } -// initialized returns true if valid keys are available. -func (k keys) initialized() bool { +// isSet returns true if valid keys are available. +func (k keys) isSet() bool { return k.aead != nil } diff --git a/internal/quic/packet_test.go b/internal/quic/packet_test.go index b13a587e54..f3a8b7d570 100644 --- a/internal/quic/packet_test.go +++ b/internal/quic/packet_test.go @@ -9,10 +9,27 @@ package quic import ( "bytes" "encoding/hex" + "fmt" "strings" "testing" ) +func (p packetType) String() string { + switch p { + case packetTypeInitial: + return "Initial" + case packetType0RTT: + return "0-RTT" + case packetTypeHandshake: + return "Handshake" + case packetTypeRetry: + return "Retry" + case packetType1RTT: + return "1-RTT" + } + return fmt.Sprintf("unknown packet type %v", byte(p)) +} + func TestPacketHeader(t *testing.T) { for _, test := range []struct { name string diff --git a/internal/quic/packet_writer.go b/internal/quic/packet_writer.go index 97987e0c2f..6c4c452cdd 100644 --- a/internal/quic/packet_writer.go +++ b/internal/quic/packet_writer.go @@ -237,7 +237,10 @@ func (w *packetWriter) appendPingFrame() (added bool) { return false } w.b = append(w.b, frameTypePing) - w.sent.appendAckElicitingFrame(frameTypePing) + // Mark this packet as ack-eliciting and in-flight, + // but there's no need to record the presence of a PING frame in it. + w.sent.ackEliciting = true + w.sent.inFlight = true return true } diff --git a/internal/quic/ping.go b/internal/quic/ping.go new file mode 100644 index 0000000000..3e7d9c51bd --- /dev/null +++ b/internal/quic/ping.go @@ -0,0 +1,16 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import "time" + +func (c *Conn) ping(space numberSpace) { + c.sendMsg(func(now time.Time, c *Conn) { + c.testSendPing.setUnsent() + c.testSendPingSpace = space + }) +} diff --git a/internal/quic/ping_test.go b/internal/quic/ping_test.go new file mode 100644 index 0000000000..4a732ed543 --- /dev/null +++ b/internal/quic/ping_test.go @@ -0,0 +1,35 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +import "testing" + +func TestPing(t *testing.T) { + tc := newTestConn(t, clientSide) + tc.conn.ping(initialSpace) + tc.wantFrame("connection should send a PING frame", + packetTypeInitial, debugFramePing{}) + + tc.advanceToTimer() + tc.wantFrame("on PTO, connection should send another PING frame", + packetTypeInitial, debugFramePing{}) + + tc.wantIdle("after sending PTO probe, no additional frames to send") +} + +func TestAck(t *testing.T) { + tc := newTestConn(t, serverSide) + tc.writeFrames(packetTypeInitial, + debugFramePing{}, + ) + tc.wantFrame("connection should respond to ack-eliciting packet with an ACK frame", + packetTypeInitial, + debugFrameAck{ + ranges: []i64range[packetNumber]{{0, 1}}, + }, + ) +} diff --git a/internal/quic/quic.go b/internal/quic/quic.go index c69c0b9840..9df7f7e2b1 100644 --- a/internal/quic/quic.go +++ b/internal/quic/quic.go @@ -41,6 +41,10 @@ const ( // https://www.rfc-editor.org/rfc/rfc9002.html#section-6.1.2-6 const timerGranularity = 1 * time.Millisecond +// Minimum size of a UDP datagram sent by a client carrying an Initial packet. +// https://www.rfc-editor.org/rfc/rfc9000#section-14.1 +const minimumClientInitialDatagramSize = 1200 + // A connSide distinguishes between the client and server sides of a connection. type connSide int8 diff --git a/internal/quic/sent_packet.go b/internal/quic/sent_packet.go index e5a80be3bb..4f11aa1368 100644 --- a/internal/quic/sent_packet.go +++ b/internal/quic/sent_packet.go @@ -29,6 +29,8 @@ type sentPacket struct { // we need to process an ack for or loss of this packet. // For example, a CRYPTO frame is recorded as the frame type (0x06), offset, and length, // but does not include the sent data. + // + // This buffer is written by packetWriter.append* and read by Conn.handleAckOrLoss. b []byte n int // read offset into b } diff --git a/internal/quic/tls.go b/internal/quic/tls.go new file mode 100644 index 0000000000..1cdb727e24 --- /dev/null +++ b/internal/quic/tls.go @@ -0,0 +1,23 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package quic + +// tlsState encapsulates interactions with TLS. +type tlsState struct { + // Encryption keys indexed by number space. + rkeys [numberSpaceCount]keys + wkeys [numberSpaceCount]keys +} + +func (s *tlsState) init(side connSide, initialConnID []byte) { + clientKeys, serverKeys := initialKeys(initialConnID) + if side == clientSide { + s.wkeys[initialSpace], s.rkeys[initialSpace] = clientKeys, serverKeys + } else { + s.wkeys[initialSpace], s.rkeys[initialSpace] = serverKeys, clientKeys + } +}