|
INET Framework for OMNeT++/OMNEST
|
00001 // 00002 // Copyright (C) 2004 Andras Varga 00003 // Copyright (C) 2009-2011 Thomas Reschka 00004 // 00005 // This program is free software; you can redistribute it and/or 00006 // modify it under the terms of the GNU Lesser General Public License 00007 // as published by the Free Software Foundation; either version 2 00008 // of the License, or (at your option) any later version. 00009 // 00010 // This program is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 // GNU Lesser General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU Lesser General Public License 00016 // along with this program; if not, see <http://www.gnu.org/licenses/>. 00017 // 00018 00019 00020 #include <string.h> 00021 #include <algorithm> // min,max 00022 #include "TCP.h" 00023 #include "TCPConnection.h" 00024 #include "TCPSegment.h" 00025 #include "TCPCommand_m.h" 00026 #include "IPControlInfo.h" 00027 #include "IPv6ControlInfo.h" 00028 #include "TCPSendQueue.h" 00029 #include "TCPSACKRexmitQueue.h" 00030 #include "TCPReceiveQueue.h" 00031 #include "TCPAlgorithm.h" 00032 00033 // 00034 // helper functions 00035 // 00036 00037 const char *TCPConnection::stateName(int state) 00038 { 00039 #define CASE(x) case x: s=#x+6; break 00040 const char *s = "unknown"; 00041 switch (state) 00042 { 00043 CASE(TCP_S_INIT); 00044 CASE(TCP_S_CLOSED); 00045 CASE(TCP_S_LISTEN); 00046 CASE(TCP_S_SYN_SENT); 00047 CASE(TCP_S_SYN_RCVD); 00048 CASE(TCP_S_ESTABLISHED); 00049 CASE(TCP_S_CLOSE_WAIT); 00050 CASE(TCP_S_LAST_ACK); 00051 CASE(TCP_S_FIN_WAIT_1); 00052 CASE(TCP_S_FIN_WAIT_2); 00053 CASE(TCP_S_CLOSING); 00054 CASE(TCP_S_TIME_WAIT); 00055 } 00056 return s; 00057 #undef CASE 00058 } 00059 00060 const char *TCPConnection::eventName(int event) 00061 { 00062 #define CASE(x) case x: s=#x+6; break 00063 const char *s = "unknown"; 00064 switch (event) 00065 { 00066 CASE(TCP_E_IGNORE); 00067 CASE(TCP_E_OPEN_ACTIVE); 00068 CASE(TCP_E_OPEN_PASSIVE); 00069 CASE(TCP_E_SEND); 00070 CASE(TCP_E_CLOSE); 00071 CASE(TCP_E_ABORT); 00072 CASE(TCP_E_STATUS); 00073 CASE(TCP_E_RCV_DATA); 00074 CASE(TCP_E_RCV_ACK); 00075 CASE(TCP_E_RCV_SYN); 00076 CASE(TCP_E_RCV_SYN_ACK); 00077 CASE(TCP_E_RCV_FIN); 00078 CASE(TCP_E_RCV_FIN_ACK); 00079 CASE(TCP_E_RCV_RST); 00080 CASE(TCP_E_RCV_UNEXP_SYN); 00081 CASE(TCP_E_TIMEOUT_2MSL); 00082 CASE(TCP_E_TIMEOUT_CONN_ESTAB); 00083 CASE(TCP_E_TIMEOUT_FIN_WAIT_2); 00084 } 00085 return s; 00086 #undef CASE 00087 } 00088 00089 const char *TCPConnection::indicationName(int code) 00090 { 00091 #define CASE(x) case x: s=#x+6; break 00092 const char *s = "unknown"; 00093 switch (code) 00094 { 00095 CASE(TCP_I_DATA); 00096 CASE(TCP_I_URGENT_DATA); 00097 CASE(TCP_I_ESTABLISHED); 00098 CASE(TCP_I_PEER_CLOSED); 00099 CASE(TCP_I_CLOSED); 00100 CASE(TCP_I_CONNECTION_REFUSED); 00101 CASE(TCP_I_CONNECTION_RESET); 00102 CASE(TCP_I_TIMED_OUT); 00103 CASE(TCP_I_STATUS); 00104 } 00105 return s; 00106 #undef CASE 00107 } 00108 00109 const char *TCPConnection::optionName(int option) 00110 { 00111 switch (option) 00112 { 00113 case TCPOPTION_END_OF_OPTION_LIST: return "EOL"; 00114 case TCPOPTION_NO_OPERATION: return "NOP"; 00115 case TCPOPTION_MAXIMUM_SEGMENT_SIZE: return "MSS"; 00116 case TCPOPTION_WINDOW_SCALE: return "WS"; 00117 case TCPOPTION_SACK_PERMITTED: return "SACK_PERMITTED"; 00118 case TCPOPTION_SACK: return "SACK"; 00119 case TCPOPTION_TIMESTAMP: return "TS"; 00120 default: return "unknown"; 00121 } 00122 } 00123 00124 void TCPConnection::printConnBrief() 00125 { 00126 tcpEV << "Connection "; 00127 tcpEV << localAddr << ":" << localPort << " to " << remoteAddr << ":" << remotePort; 00128 tcpEV << " on app[" << appGateIndex << "],connId=" << connId; 00129 tcpEV << " in " << stateName(fsm.getState()); 00130 tcpEV << " (ptr=0x" << this << ")\n"; 00131 } 00132 00133 void TCPConnection::printSegmentBrief(TCPSegment *tcpseg) 00134 { 00135 tcpEV << "." << tcpseg->getSrcPort() << " > "; 00136 tcpEV << "." << tcpseg->getDestPort() << ": "; 00137 00138 if (tcpseg->getSynBit()) tcpEV << (tcpseg->getAckBit() ? "SYN+ACK " : "SYN "); 00139 if (tcpseg->getFinBit()) tcpEV << "FIN(+ACK) "; 00140 if (tcpseg->getRstBit()) tcpEV << (tcpseg->getAckBit() ? "RST+ACK " : "RST "); 00141 if (tcpseg->getPshBit()) tcpEV << "PSH "; 00142 00143 if (tcpseg->getPayloadLength()>0 || tcpseg->getSynBit()) 00144 { 00145 tcpEV << "[" << tcpseg->getSequenceNo() << ".." << (tcpseg->getSequenceNo()+tcpseg->getPayloadLength()) << ") "; 00146 tcpEV << "(l=" << tcpseg->getPayloadLength() << ") "; 00147 } 00148 if (tcpseg->getAckBit()) tcpEV << "ack " << tcpseg->getAckNo() << " "; 00149 tcpEV << "win " << tcpseg->getWindow() << " "; 00150 if (tcpseg->getUrgBit()) tcpEV << "urg " << tcpseg->getUrgentPointer() << " "; 00151 if (tcpseg->getHeaderLength() > TCP_HEADER_OCTETS) // Header options present? TCP_HEADER_OCTETS = 20 00152 { 00153 tcpEV << "options "; 00154 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 00155 { 00156 const TCPOption& option = tcpseg->getOptions(i); 00157 short kind = option.getKind(); 00158 tcpEV << optionName(kind) << " "; 00159 } 00160 } 00161 tcpEV << "\n"; 00162 } 00163 00164 TCPConnection *TCPConnection::cloneListeningConnection() 00165 { 00166 TCPConnection *conn = new TCPConnection(tcpMain,appGateIndex,connId); 00167 00168 // following code to be kept consistent with initConnection() 00169 const char *sendQueueClass = sendQueue->getClassName(); 00170 conn->sendQueue = check_and_cast<TCPSendQueue *>(createOne(sendQueueClass)); 00171 conn->sendQueue->setConnection(conn); 00172 00173 const char *receiveQueueClass = receiveQueue->getClassName(); 00174 conn->receiveQueue = check_and_cast<TCPReceiveQueue *>(createOne(receiveQueueClass)); 00175 conn->receiveQueue->setConnection(conn); 00176 00177 // create SACK retransmit queue 00178 rexmitQueue = new TCPSACKRexmitQueue(); 00179 rexmitQueue->setConnection(this); 00180 00181 const char *tcpAlgorithmClass = tcpAlgorithm->getClassName(); 00182 conn->tcpAlgorithm = check_and_cast<TCPAlgorithm *>(createOne(tcpAlgorithmClass)); 00183 conn->tcpAlgorithm->setConnection(conn); 00184 00185 conn->state = conn->tcpAlgorithm->getStateVariables(); 00186 configureStateVariables(); 00187 conn->tcpAlgorithm->initialize(); 00188 00189 // put it into LISTEN, with our localAddr/localPort 00190 conn->state->active = false; 00191 conn->state->fork = true; 00192 conn->localAddr = localAddr; 00193 conn->localPort = localPort; 00194 FSM_Goto(conn->fsm, TCP_S_LISTEN); 00195 00196 return conn; 00197 } 00198 00199 void TCPConnection::sendToIP(TCPSegment *tcpseg) 00200 { 00201 // record seq (only if we do send data) and ackno 00202 if (sndNxtVector && tcpseg->getPayloadLength()!=0) 00203 sndNxtVector->record(tcpseg->getSequenceNo()); 00204 if (sndAckVector) 00205 sndAckVector->record(tcpseg->getAckNo()); 00206 00207 // final touches on the segment before sending 00208 tcpseg->setSrcPort(localPort); 00209 tcpseg->setDestPort(remotePort); 00210 ASSERT(tcpseg->getHeaderLength() >= TCP_HEADER_OCTETS); // TCP_HEADER_OCTETS = 20 (without options) 00211 ASSERT(tcpseg->getHeaderLength() <= TCP_MAX_HEADER_OCTETS); // TCP_MAX_HEADER_OCTETS = 60 00212 tcpseg->setByteLength(tcpseg->getHeaderLength() + tcpseg->getPayloadLength()); 00213 state->sentBytes = tcpseg->getPayloadLength(); // resetting sentBytes to 0 if sending a segment without data (e.g. ACK) 00214 00215 tcpEV << "Sending: "; 00216 printSegmentBrief(tcpseg); 00217 00218 // TBD reuse next function for sending 00219 00220 if (!remoteAddr.isIPv6()) 00221 { 00222 // send over IPv4 00223 IPControlInfo *controlInfo = new IPControlInfo(); 00224 controlInfo->setProtocol(IP_PROT_TCP); 00225 controlInfo->setSrcAddr(localAddr.get4()); 00226 controlInfo->setDestAddr(remoteAddr.get4()); 00227 tcpseg->setControlInfo(controlInfo); 00228 00229 tcpMain->send(tcpseg,"ipOut"); 00230 } 00231 else 00232 { 00233 // send over IPv6 00234 IPv6ControlInfo *controlInfo = new IPv6ControlInfo(); 00235 controlInfo->setProtocol(IP_PROT_TCP); 00236 controlInfo->setSrcAddr(localAddr.get6()); 00237 controlInfo->setDestAddr(remoteAddr.get6()); 00238 tcpseg->setControlInfo(controlInfo); 00239 00240 tcpMain->send(tcpseg,"ipv6Out"); 00241 } 00242 } 00243 00244 void TCPConnection::sendToIP(TCPSegment *tcpseg, IPvXAddress src, IPvXAddress dest) 00245 { 00246 tcpEV << "Sending: "; 00247 printSegmentBrief(tcpseg); 00248 00249 if (!dest.isIPv6()) 00250 { 00251 // send over IPv4 00252 IPControlInfo *controlInfo = new IPControlInfo(); 00253 controlInfo->setProtocol(IP_PROT_TCP); 00254 controlInfo->setSrcAddr(src.get4()); 00255 controlInfo->setDestAddr(dest.get4()); 00256 tcpseg->setControlInfo(controlInfo); 00257 00258 check_and_cast<TCP *>(simulation.getContextModule())->send(tcpseg,"ipOut"); 00259 } 00260 else 00261 { 00262 // send over IPv6 00263 IPv6ControlInfo *controlInfo = new IPv6ControlInfo(); 00264 controlInfo->setProtocol(IP_PROT_TCP); 00265 controlInfo->setSrcAddr(src.get6()); 00266 controlInfo->setDestAddr(dest.get6()); 00267 tcpseg->setControlInfo(controlInfo); 00268 00269 check_and_cast<TCP *>(simulation.getContextModule())->send(tcpseg,"ipv6Out"); 00270 } 00271 } 00272 00273 TCPSegment *TCPConnection::createTCPSegment(const char *name) 00274 { 00275 return new TCPSegment(name); 00276 } 00277 00278 void TCPConnection::signalConnectionTimeout() 00279 { 00280 sendIndicationToApp(TCP_I_TIMED_OUT); 00281 } 00282 00283 void TCPConnection::sendIndicationToApp(int code) 00284 { 00285 tcpEV << "Notifying app: " << indicationName(code) << "\n"; 00286 cMessage *msg = new cMessage(indicationName(code)); 00287 msg->setKind(code); 00288 TCPCommand *ind = new TCPCommand(); 00289 ind->setConnId(connId); 00290 msg->setControlInfo(ind); 00291 tcpMain->send(msg, "appOut", appGateIndex); 00292 } 00293 00294 void TCPConnection::sendEstabIndicationToApp() 00295 { 00296 tcpEV << "Notifying app: " << indicationName(TCP_I_ESTABLISHED) << "\n"; 00297 cMessage *msg = new cMessage(indicationName(TCP_I_ESTABLISHED)); 00298 msg->setKind(TCP_I_ESTABLISHED); 00299 00300 TCPConnectInfo *ind = new TCPConnectInfo(); 00301 ind->setConnId(connId); 00302 ind->setLocalAddr(localAddr); 00303 ind->setRemoteAddr(remoteAddr); 00304 ind->setLocalPort(localPort); 00305 ind->setRemotePort(remotePort); 00306 00307 msg->setControlInfo(ind); 00308 tcpMain->send(msg, "appOut", appGateIndex); 00309 } 00310 00311 void TCPConnection::sendToApp(cMessage *msg) 00312 { 00313 tcpMain->send(msg, "appOut", appGateIndex); 00314 } 00315 00316 void TCPConnection::initConnection(TCPOpenCommand *openCmd) 00317 { 00318 // create send queue 00319 const char *sendQueueClass = openCmd->getSendQueueClass(); 00320 if (!sendQueueClass || !sendQueueClass[0]) 00321 sendQueueClass = tcpMain->par("sendQueueClass"); 00322 sendQueue = check_and_cast<TCPSendQueue *>(createOne(sendQueueClass)); 00323 sendQueue->setConnection(this); 00324 00325 // create receive queue 00326 const char *receiveQueueClass = openCmd->getReceiveQueueClass(); 00327 if (!receiveQueueClass || !receiveQueueClass[0]) 00328 receiveQueueClass = tcpMain->par("receiveQueueClass"); 00329 receiveQueue = check_and_cast<TCPReceiveQueue *>(createOne(receiveQueueClass)); 00330 receiveQueue->setConnection(this); 00331 00332 // create SACK retransmit queue 00333 rexmitQueue = new TCPSACKRexmitQueue(); 00334 rexmitQueue->setConnection(this); 00335 00336 // create algorithm 00337 const char *tcpAlgorithmClass = openCmd->getTcpAlgorithmClass(); 00338 if (!tcpAlgorithmClass || !tcpAlgorithmClass[0]) 00339 tcpAlgorithmClass = tcpMain->par("tcpAlgorithmClass"); 00340 tcpAlgorithm = check_and_cast<TCPAlgorithm *>(createOne(tcpAlgorithmClass)); 00341 tcpAlgorithm->setConnection(this); 00342 00343 // create state block 00344 state = tcpAlgorithm->getStateVariables(); 00345 configureStateVariables(); 00346 tcpAlgorithm->initialize(); 00347 } 00348 00349 void TCPConnection::configureStateVariables() 00350 { 00351 long advertisedWindowPar = tcpMain->par("advertisedWindow").longValue(); 00352 state->ws_support = tcpMain->par("windowScalingSupport"); // if set, this means that current host supports WS (RFC 1323) 00353 if (!state->ws_support && (advertisedWindowPar > TCP_MAX_WIN || advertisedWindowPar <= 0)) 00354 throw cRuntimeError("Invalid advertisedWindow parameter: %ld", advertisedWindowPar); 00355 state->rcv_wnd = advertisedWindowPar; 00356 state->rcv_adv = advertisedWindowPar; 00357 if (state->ws_support && advertisedWindowPar > TCP_MAX_WIN) 00358 { 00359 state->rcv_wnd = TCP_MAX_WIN; // we cannot to guarantee that the other end is also supporting the Window Scale (header option) (RFC 1322) 00360 state->rcv_adv = TCP_MAX_WIN; // therefore TCP_MAX_WIN is used as initial value for rcv_wnd and rcv_adv 00361 } 00362 state->maxRcvBuffer = advertisedWindowPar; 00363 state->delayed_acks_enabled = tcpMain->par("delayedAcksEnabled"); // delayed ACK algorithm (RFC 1122) enabled/disabled 00364 state->nagle_enabled = tcpMain->par("nagleEnabled"); // Nagle's algorithm (RFC 896) enabled/disabled 00365 state->limited_transmit_enabled = tcpMain->par("limitedTransmitEnabled"); // Limited Transmit algorithm (RFC 3042) enabled/disabled 00366 state->increased_IW_enabled = tcpMain->par("increasedIWEnabled"); // Increased Initial Window (RFC 3390) enabled/disabled 00367 state->snd_mss = tcpMain->par("mss").longValue(); // Maximum Segment Size (RFC 793) 00368 state->ts_support = tcpMain->par("timestampSupport"); // if set, this means that current host supports TS (RFC 1323) 00369 state->sack_support = tcpMain->par("sackSupport"); // if set, this means that current host supports SACK (RFC 2018, 2883, 3517) 00370 if (state->sack_support) 00371 { 00372 std::string algorithmName1 = "TCPReno"; 00373 std::string algorithmName2 = tcpMain->par("tcpAlgorithmClass"); 00374 if (algorithmName1!=algorithmName2) // TODO add additional checks for new SACK supporting algorithms here once they are implemented 00375 { 00376 EV << "If you want to use TCP SACK please set tcpAlgorithmClass to TCPReno" << endl; 00377 ASSERT(false); 00378 } 00379 } 00380 } 00381 00382 void TCPConnection::selectInitialSeqNum() 00383 { 00384 // set the initial send sequence number 00385 state->iss = (unsigned long)fmod(SIMTIME_DBL(simTime())*250000.0, 1.0+(double)(unsigned)0xffffffffUL) & 0xffffffffUL; 00386 00387 state->snd_una = state->snd_nxt = state->snd_max = state->iss; 00388 00389 sendQueue->init(state->iss+1); // +1 is for SYN 00390 rexmitQueue->init(state->iss + 1); // +1 is for SYN 00391 } 00392 00393 bool TCPConnection::isSegmentAcceptable(TCPSegment *tcpseg) 00394 { 00395 // check that segment entirely falls in receive window 00396 // RFC 793, page 69: 00397 // "There are four cases for the acceptability test for an incoming segment:" 00398 uint32 len = tcpseg->getPayloadLength(); 00399 uint32 seqNo = tcpseg->getSequenceNo(); 00400 bool ret; 00401 00402 if (len == 0) 00403 { 00404 if (state->rcv_wnd == 0) 00405 ret = (seqNo == state->rcv_nxt); 00406 else // rcv_wnd > 0 00407 ret = seqLE(state->rcv_nxt, seqNo) && seqLess(seqNo, state->rcv_nxt + state->rcv_wnd); 00408 } 00409 else // len > 0 00410 { 00411 if (state->rcv_wnd == 0) 00412 ret = false; 00413 else // rcv_wnd > 0 00414 ret = (seqLE(state->rcv_nxt, seqNo) && seqLess(seqNo, state->rcv_nxt + state->rcv_wnd)) 00415 || 00416 (seqLE(state->rcv_nxt, seqNo + len - 1) && seqLess(seqNo + len - 1, state->rcv_nxt + state->rcv_wnd)); 00417 } 00418 if (!ret) 00419 { 00420 tcpEV << "Not Acceptable segment. seqNo:" << seqNo << ", len:" << len << ", rcv_nxt:" << state->rcv_nxt << ", rcv_wnd:" << state->rcv_wnd << endl; 00421 } 00422 return ret; 00423 } 00424 00425 void TCPConnection::sendSyn() 00426 { 00427 if (remoteAddr.isUnspecified() || remotePort==-1) 00428 opp_error("Error processing command OPEN_ACTIVE: foreign socket unspecified"); 00429 if (localPort==-1) 00430 opp_error("Error processing command OPEN_ACTIVE: local port unspecified"); 00431 00432 // create segment 00433 TCPSegment *tcpseg = createTCPSegment("SYN"); 00434 tcpseg->setSequenceNo(state->iss); 00435 tcpseg->setSynBit(true); 00436 updateRcvWnd(); 00437 tcpseg->setWindow(state->rcv_wnd); 00438 00439 state->snd_max = state->snd_nxt = state->iss+1; 00440 00441 // write header options 00442 writeHeaderOptions(tcpseg); 00443 00444 // send it 00445 sendToIP(tcpseg); 00446 } 00447 00448 void TCPConnection::sendSynAck() 00449 { 00450 // create segment 00451 TCPSegment *tcpseg = createTCPSegment("SYN+ACK"); 00452 tcpseg->setSequenceNo(state->iss); 00453 tcpseg->setAckNo(state->rcv_nxt); 00454 tcpseg->setSynBit(true); 00455 tcpseg->setAckBit(true); 00456 updateRcvWnd(); 00457 tcpseg->setWindow(state->rcv_wnd); 00458 00459 state->snd_max = state->snd_nxt = state->iss+1; 00460 00461 // write header options 00462 writeHeaderOptions(tcpseg); 00463 00464 // send it 00465 sendToIP(tcpseg); 00466 00467 // notify 00468 tcpAlgorithm->ackSent(); 00469 } 00470 00471 void TCPConnection::sendRst(uint32 seqNo) 00472 { 00473 sendRst(seqNo, localAddr, remoteAddr, localPort, remotePort); 00474 } 00475 00476 void TCPConnection::sendRst(uint32 seq, IPvXAddress src, IPvXAddress dest, int srcPort, int destPort) 00477 { 00478 TCPSegment *tcpseg = createTCPSegment("RST"); 00479 00480 tcpseg->setSrcPort(srcPort); 00481 tcpseg->setDestPort(destPort); 00482 00483 tcpseg->setRstBit(true); 00484 tcpseg->setSequenceNo(seq); 00485 00486 // send it 00487 sendToIP(tcpseg, src, dest); 00488 } 00489 00490 void TCPConnection::sendRstAck(uint32 seq, uint32 ack, IPvXAddress src, IPvXAddress dest, int srcPort, int destPort) 00491 { 00492 TCPSegment *tcpseg = createTCPSegment("RST+ACK"); 00493 00494 tcpseg->setSrcPort(srcPort); 00495 tcpseg->setDestPort(destPort); 00496 00497 tcpseg->setRstBit(true); 00498 tcpseg->setAckBit(true); 00499 tcpseg->setSequenceNo(seq); 00500 tcpseg->setAckNo(ack); 00501 00502 // send it 00503 sendToIP(tcpseg, src, dest); 00504 00505 // notify 00506 tcpAlgorithm->ackSent(); 00507 } 00508 00509 void TCPConnection::sendAck() 00510 { 00511 TCPSegment *tcpseg = createTCPSegment("ACK"); 00512 00513 tcpseg->setAckBit(true); 00514 tcpseg->setSequenceNo(state->snd_nxt); 00515 tcpseg->setAckNo(state->rcv_nxt); 00516 tcpseg->setWindow(updateRcvWnd()); 00517 00518 // write header options 00519 writeHeaderOptions(tcpseg); 00520 00521 // send it 00522 sendToIP(tcpseg); 00523 00524 // notify 00525 tcpAlgorithm->ackSent(); 00526 } 00527 00528 void TCPConnection::sendFin() 00529 { 00530 TCPSegment *tcpseg = createTCPSegment("FIN"); 00531 00532 // Note: ACK bit *must* be set for both FIN and FIN+ACK. What makes 00533 // the difference for FIN+ACK is that its ackNo acks the remote TCP's FIN. 00534 tcpseg->setFinBit(true); 00535 tcpseg->setAckBit(true); 00536 tcpseg->setAckNo(state->rcv_nxt); 00537 tcpseg->setSequenceNo(state->snd_nxt); 00538 tcpseg->setWindow(updateRcvWnd()); 00539 00540 // send it 00541 sendToIP(tcpseg); 00542 00543 // notify 00544 tcpAlgorithm->ackSent(); 00545 } 00546 00547 void TCPConnection::sendSegment(uint32 bytes) 00548 { 00549 if (state->sack_enabled && state->afterRto) 00550 { 00551 // check rexmitQ and try to forward snd_nxt before sending new data 00552 uint32 forward = rexmitQueue->checkRexmitQueueForSackedOrRexmittedSegments(state->snd_nxt); 00553 state->snd_nxt = state->snd_nxt + forward; 00554 } 00555 00556 ulong buffered = sendQueue->getBytesAvailable(state->snd_nxt); 00557 if (bytes > buffered) // last segment? 00558 bytes = buffered; 00559 00560 // if header options will be added, this could reduce the number of data bytes allowed for this segment, 00561 // because following condition must to be respected: 00562 // bytes + options_len <= snd_mss 00563 TCPSegment *tcpseg_temp = createTCPSegment(NULL); 00564 tcpseg_temp->setAckBit(true); // needed for TS option, otherwise TSecr will be set to 0 00565 writeHeaderOptions(tcpseg_temp); 00566 uint options_len = tcpseg_temp->getHeaderLength() - TCP_HEADER_OCTETS; // TCP_HEADER_OCTETS = 20 00567 while (bytes + options_len > state->snd_mss) 00568 bytes--; 00569 state->sentBytes = bytes; 00570 00571 // send one segment of 'bytes' bytes from snd_nxt, and advance snd_nxt 00572 TCPSegment *tcpseg = sendQueue->createSegmentWithBytes(state->snd_nxt, bytes); 00573 00574 // if sack_enabled copy region of tcpseg to rexmitQueue 00575 if (state->sack_enabled) 00576 rexmitQueue->enqueueSentData(state->snd_nxt, state->snd_nxt+bytes); 00577 00578 tcpseg->setAckNo(state->rcv_nxt); 00579 tcpseg->setAckBit(true); 00580 tcpseg->setWindow(updateRcvWnd()); 00581 00582 // TBD when to set PSH bit? 00583 // TBD set URG bit if needed 00584 ASSERT(bytes==tcpseg->getPayloadLength()); 00585 00586 state->snd_nxt += bytes; 00587 00588 // check if afterRto bit can be reset 00589 if (state->afterRto && seqGE(state->snd_nxt, state->snd_max)) 00590 state->afterRto = false; 00591 00592 if (state->send_fin && state->snd_nxt==state->snd_fin_seq) 00593 { 00594 tcpEV << "Setting FIN on segment\n"; 00595 tcpseg->setFinBit(true); 00596 state->snd_nxt = state->snd_fin_seq+1; 00597 } 00598 00599 // add header options and update header length (from tcpseg_temp) 00600 tcpseg->setOptionsArraySize(tcpseg_temp->getOptionsArraySize()); 00601 for (uint i=0; i<tcpseg_temp->getOptionsArraySize(); i++) 00602 tcpseg->setOptions(i, tcpseg_temp->getOptions(i)); 00603 tcpseg->setHeaderLength(tcpseg_temp->getHeaderLength()); 00604 delete tcpseg_temp; 00605 00606 // send it 00607 sendToIP(tcpseg); 00608 } 00609 00610 bool TCPConnection::sendData(bool fullSegmentsOnly, uint32 congestionWindow) 00611 { 00612 if (!state->afterRto) 00613 { 00614 // we'll start sending from snd_max 00615 state->snd_nxt = state->snd_max; 00616 } 00617 00618 uint32 old_highRxt = 0; 00619 if (state->sack_enabled) 00620 old_highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 00621 00622 // check how many bytes we have 00623 ulong buffered = sendQueue->getBytesAvailable(state->snd_nxt); 00624 if (buffered==0) 00625 return false; 00626 00627 // maxWindow is minimum of snd_wnd and congestionWindow (snd_cwnd) 00628 ulong maxWindow = std::min(state->snd_wnd, congestionWindow); 00629 00630 // effectiveWindow: number of bytes we're allowed to send now 00631 long effectiveWin = maxWindow - (state->snd_nxt - state->snd_una); 00632 if (effectiveWin <= 0) 00633 { 00634 tcpEV << "Effective window is zero (advertised window " << state->snd_wnd << 00635 ", congestion window " << congestionWindow << "), cannot send.\n"; 00636 return false; 00637 } 00638 00639 ulong bytesToSend = effectiveWin; 00640 00641 if (bytesToSend > buffered) 00642 bytesToSend = buffered; 00643 00644 uint32 effectiveMaxBytesSend = state->snd_mss; 00645 if (state->ts_enabled) 00646 effectiveMaxBytesSend -= TCP_OPTION_TS_SIZE; 00647 00648 // last segment could be less than state->snd_mss (or less than snd_mss-TCP_OPTION_TS_SIZE is using TS option) 00649 if (fullSegmentsOnly && buffered > (ulong)effectiveWin && 00650 (bytesToSend < (effectiveMaxBytesSend))) 00651 { 00652 tcpEV << "Cannot send, not enough data for a full segment (SMSS=" << state->snd_mss 00653 << ", in buffer " << buffered << ")\n"; 00654 return false; 00655 } 00656 00657 // start sending 'bytesToSend' bytes 00658 tcpEV << "Will send " << bytesToSend << " bytes (effectiveWindow " << effectiveWin 00659 << ", in buffer " << buffered << " bytes)\n"; 00660 00661 uint32 old_snd_nxt = state->snd_nxt; 00662 ASSERT(bytesToSend>0); 00663 00664 #ifdef TCP_SENDFRAGMENTS /* normally undefined */ 00665 // make agressive use of the window until the last byte 00666 while (bytesToSend>0) 00667 { 00668 ulong bytes = std::min(bytesToSend, state->snd_mss); 00669 sendSegment(bytes); 00670 bytesToSend -= state->sentBytes; 00671 } 00672 #else 00673 // send <MSS segments only if it's the only segment we can send now - Note: If bytesToSend=1010, MSS=1012, ts_enabled=true => we may send 2 segments (1000 payload + optionsHeader and 10 payload + optionsHeader) 00674 // FIXME this should probably obey Nagle's alg -- to be checked 00675 if (bytesToSend <= state->snd_mss) 00676 { 00677 sendSegment(bytesToSend); 00678 bytesToSend -= state->sentBytes; 00679 } 00680 else // send whole segments only (nagle_enabled) 00681 { 00682 while (bytesToSend >= effectiveMaxBytesSend) 00683 { 00684 sendSegment(state->snd_mss); 00685 bytesToSend -= state->sentBytes; 00686 } 00687 } 00688 // check how many bytes we have - last segment could be less than state->snd_mss 00689 buffered = sendQueue->getBytesAvailable(state->snd_nxt); 00690 if (bytesToSend==buffered && buffered!=0) // last segment? 00691 sendSegment(bytesToSend); 00692 else if (bytesToSend>0) 00693 tcpEV << bytesToSend << " bytes of space left in effectiveWindow\n"; 00694 #endif 00695 00696 // remember highest seq sent (snd_nxt may be set back on retransmission, 00697 // but we'll need snd_max to check validity of ACKs -- they must ack 00698 // something we really sent) 00699 if (seqGreater(state->snd_nxt, state->snd_max)) 00700 state->snd_max = state->snd_nxt; 00701 if (unackedVector) unackedVector->record(state->snd_max - state->snd_una); 00702 00703 // notify (once is enough) 00704 tcpAlgorithm->ackSent(); 00705 if (state->sack_enabled && state->lossRecovery && old_highRxt != state->highRxt) 00706 { 00707 // Note: Restart of REXMIT timer on retransmission is not part of RFC 2581, however optional in RFC 3517 if sent during recovery. 00708 tcpEV << "Retransmission sent during recovery, restarting REXMIT timer.\n"; 00709 tcpAlgorithm->restartRexmitTimer(); 00710 } 00711 else // don't measure RTT for retransmitted packets 00712 tcpAlgorithm->dataSent(old_snd_nxt); 00713 00714 return true; 00715 } 00716 00717 bool TCPConnection::sendProbe() 00718 { 00719 // we'll start sending from snd_max 00720 state->snd_nxt = state->snd_max; 00721 00722 // check we have 1 byte to send 00723 if (sendQueue->getBytesAvailable(state->snd_nxt)==0) 00724 { 00725 tcpEV << "Cannot send probe because send buffer is empty\n"; 00726 return false; 00727 } 00728 00729 uint32 old_snd_nxt = state->snd_nxt; 00730 00731 tcpEV << "Sending 1 byte as probe, with seq=" << state->snd_nxt << "\n"; 00732 sendSegment(1); 00733 00734 // remember highest seq sent (snd_nxt may be set back on retransmission, 00735 // but we'll need snd_max to check validity of ACKs -- they must ack 00736 // something we really sent) 00737 state->snd_max = state->snd_nxt; 00738 if (unackedVector) unackedVector->record(state->snd_max - state->snd_una); 00739 00740 // notify 00741 tcpAlgorithm->ackSent(); 00742 tcpAlgorithm->dataSent(old_snd_nxt); 00743 00744 return true; 00745 } 00746 00747 void TCPConnection::retransmitOneSegment(bool called_at_rto) 00748 { 00749 uint32 old_snd_nxt = state->snd_nxt; 00750 00751 // retransmit one segment at snd_una, and set snd_nxt accordingly (if not called at RTO) 00752 state->snd_nxt = state->snd_una; 00753 00754 // When FIN sent the snd_max-snd_nxt larger than bytes available in queue 00755 ulong bytes = std::min((ulong)std::min(state->snd_mss, state->snd_max - state->snd_nxt), 00756 sendQueue->getBytesAvailable(state->snd_nxt)); 00757 00758 // FIN (without user data) needs to be resent 00759 if (bytes == 0 && state->send_fin && state->snd_fin_seq == sendQueue->getBufferEndSeq()) 00760 { 00761 state->snd_max = sendQueue->getBufferEndSeq(); 00762 tcpEV << "No outstanding DATA, resending FIN, advancing snd_nxt over the FIN\n"; 00763 state->snd_nxt = state->snd_max; 00764 sendFin(); 00765 state->snd_max = ++state->snd_nxt; 00766 00767 if (unackedVector) 00768 unackedVector->record(state->snd_max - state->snd_una); 00769 } 00770 else 00771 { 00772 ASSERT(bytes != 0); 00773 00774 sendSegment(bytes); 00775 00776 if (!called_at_rto) 00777 { 00778 if (seqGreater(old_snd_nxt, state->snd_nxt)) 00779 state->snd_nxt = old_snd_nxt; 00780 } 00781 00782 // notify 00783 tcpAlgorithm->ackSent(); 00784 00785 if (state->sack_enabled) 00786 { 00787 // RFC 3517, page 7: "(3) Retransmit the first data segment presumed dropped -- the segment 00788 // starting with sequence number HighACK + 1. To prevent repeated 00789 // retransmission of the same data, set HighRxt to the highest 00790 // sequence number in the retransmitted segment." 00791 state->highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 00792 } 00793 } 00794 } 00795 00796 void TCPConnection::retransmitData() 00797 { 00798 // retransmit everything from snd_una 00799 state->snd_nxt = state->snd_una; 00800 00801 uint32 bytesToSend = state->snd_max - state->snd_nxt; 00802 ASSERT(bytesToSend!=0); 00803 00804 // TBD - avoid to send more than allowed - check cwnd and rwnd before retransmitting data! 00805 while (bytesToSend>0) 00806 { 00807 uint32 bytes = std::min(bytesToSend, state->snd_mss); 00808 bytes = std::min(bytes, (uint32)(sendQueue->getBytesAvailable(state->snd_nxt))); 00809 sendSegment(bytes); 00810 // Do not send packets after the FIN. 00811 // fixes bug that occurs in examples/inet/bulktransfer at event #64043 T=13.861159213744 00812 if (state->send_fin && state->snd_nxt==state->snd_fin_seq+1) 00813 break; 00814 bytesToSend -= state->sentBytes; 00815 } 00816 } 00817 00818 void TCPConnection::readHeaderOptions(TCPSegment *tcpseg) 00819 { 00820 tcpEV << "TCP Header Option(s) received:\n"; 00821 00822 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 00823 { 00824 const TCPOption& option = tcpseg->getOptions(i); 00825 short kind = option.getKind(); 00826 short length = option.getLength(); 00827 tcpEV << "Option type " << kind << " (" << optionName(kind) << "), length " << length << "\n"; 00828 bool ok = true; 00829 switch(kind) 00830 { 00831 case TCPOPTION_END_OF_OPTION_LIST: // EOL=0 00832 case TCPOPTION_NO_OPERATION: // NOP=1 00833 if (length != 1) 00834 { 00835 tcpEV << "ERROR: option length incorrect\n"; 00836 ok = false; 00837 } 00838 break; 00839 case TCPOPTION_MAXIMUM_SEGMENT_SIZE: // MSS=2 00840 ok = processMSSOption(tcpseg, option); 00841 break; 00842 case TCPOPTION_WINDOW_SCALE: // WS=3 00843 ok = processWSOption(tcpseg, option); 00844 break; 00845 case TCPOPTION_SACK_PERMITTED: // SACK_PERMITTED=4 00846 ok = processSACKPermittedOption(tcpseg, option); 00847 break; 00848 case TCPOPTION_SACK: // SACK=5 00849 ok = processSACKOption(tcpseg, option); 00850 break; 00851 case TCPOPTION_TIMESTAMP: // TS=8 00852 ok = processTSOption(tcpseg, option); 00853 break; 00854 // TODO add new TCPOptions here once they are implemented 00855 // TODO delegate to TCPAlgorithm as well -- it may want to recognized additional options 00856 default: 00857 tcpEV << "ERROR: Unsupported TCP option kind " << kind << "\n"; 00858 break; 00859 } 00860 (void)ok; // unused 00861 } 00862 } 00863 00864 bool TCPConnection::processMSSOption(TCPSegment *tcpseg, const TCPOption& option) 00865 { 00866 if (option.getLength() != 4) 00867 { 00868 tcpEV << "ERROR: option length incorrect\n"; 00869 return false; 00870 } 00871 00872 if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT) 00873 { 00874 tcpEV << "ERROR: TCP Header Option MSS received, but in unexpected state\n"; 00875 return false; 00876 } 00877 00878 if (option.getValuesArraySize() == 0) 00879 { 00880 // since option.getLength() was already checked, this is a programming error not a TCP error 00881 throw cRuntimeError("TCPOption for MSS does not contain the data its getLength() promises"); 00882 } 00883 00884 // RFC 2581, page 1: 00885 // "The SMSS is the size of the largest segment that the sender can transmit. 00886 // This value can be based on the maximum transmission unit of the network, 00887 // the path MTU discovery [MD90] algorithm, RMSS (see next item), or other 00888 // factors. The size does not include the TCP/IP headers and options." 00889 // 00890 // "The RMSS is the size of the largest segment the receiver is willing to accept. 00891 // This is the value specified in the MSS option sent by the receiver during 00892 // connection startup. Or, if the MSS option is not used, 536 bytes [Bra89]. 00893 // The size does not include the TCP/IP headers and options." 00894 // 00895 // 00896 // The value of snd_mss (SMSS) is set to the minimum of snd_mss (local parameter) and 00897 // the value specified in the MSS option received during connection startup. 00898 state->snd_mss = std::min(state->snd_mss, (uint32) option.getValues(0)); 00899 if (state->snd_mss==0) 00900 state->snd_mss = 536; 00901 tcpEV << "TCP Header Option MSS(=" << option.getValues(0) << ") received, SMSS is set to: " << state->snd_mss << "\n"; 00902 return true; 00903 } 00904 00905 bool TCPConnection::processWSOption(TCPSegment *tcpseg, const TCPOption& option) 00906 { 00907 if (option.getLength() != 3) 00908 { 00909 tcpEV << "ERROR: length incorrect\n"; 00910 return false; 00911 } 00912 00913 if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT) 00914 { 00915 tcpEV << "ERROR: TCP Header Option WS received, but in unexpected state\n"; 00916 return false; 00917 } 00918 00919 if (option.getValuesArraySize() == 0) 00920 { 00921 // since option.getLength() was already checked, this is a programming error not a TCP error 00922 throw cRuntimeError("TCPOption for WS does not contain the data its getLength() promises"); 00923 } 00924 00925 state->rcv_ws = true; 00926 state->ws_enabled = state->ws_support && state->snd_ws && state->rcv_ws; 00927 state->snd_wnd_scale = option.getValues(0); 00928 tcpEV << "TCP Header Option WS(=" << state->snd_wnd_scale << ") received, WS (ws_enabled) is set to: " << state->ws_enabled << "\n"; 00929 if (state->snd_wnd_scale > 14) // RFC 1323, page 11: "the shift count must be limited to 14" 00930 { 00931 tcpEV << "ERROR: TCP Header Option WS received but shift count value is exceeding 14\n"; 00932 state->snd_wnd_scale = 14; 00933 } 00934 return true; 00935 } 00936 00937 bool TCPConnection::processTSOption(TCPSegment *tcpseg, const TCPOption& option) 00938 { 00939 if (option.getLength() != 10) 00940 { 00941 tcpEV << "ERROR: length incorrect\n"; 00942 return false; 00943 } 00944 00945 if ((!state->ts_enabled && fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT) || 00946 (state->ts_enabled && fsm.getState() != TCP_S_SYN_RCVD && fsm.getState() != TCP_S_ESTABLISHED && 00947 fsm.getState() != TCP_S_FIN_WAIT_1 && fsm.getState() != TCP_S_FIN_WAIT_2)) 00948 { 00949 tcpEV << "ERROR: TCP Header Option TS received, but in unexpected state\n"; 00950 return false; 00951 } 00952 00953 if (option.getValuesArraySize() != 2) 00954 { 00955 // since option.getLength() was already checked, this is a programming error not a TCP error 00956 throw cRuntimeError("TCPOption for TS does not contain the data its getLength() promises"); 00957 } 00958 00959 if (!state->ts_enabled) 00960 { 00961 state->rcv_initial_ts = true; 00962 state->ts_enabled = state->ts_support && state->snd_initial_ts && state->rcv_initial_ts; 00963 tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") received, TS (ts_enabled) is set to: " << state->ts_enabled << "\n"; 00964 } 00965 else 00966 tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") received\n"; 00967 00968 // RFC 1323, page 35: 00969 // "Check whether the segment contains a Timestamps option and bit 00970 // Snd.TS.OK is on. If so: 00971 // If SEG.TSval < TS.Recent, then test whether connection has 00972 // been idle less than 24 days; if both are true, then the 00973 // segment is not acceptable; follow steps below for an 00974 // unacceptable segment. 00975 // If SEG.SEQ is equal to Last.ACK.sent, then save SEG.[TSval] in 00976 // variable TS.Recent." 00977 if (state->ts_enabled) 00978 { 00979 if (seqLess(option.getValues(0), state->ts_recent)) 00980 { 00981 if ((simTime() - state->time_last_data_sent) > PAWS_IDLE_TIME_THRESH) // PAWS_IDLE_TIME_THRESH = 24 days 00982 { 00983 tcpEV << "PAWS: Segment is not acceptable, TSval=" << option.getValues(0) << " in " << stateName(fsm.getState()) << " state received: dropping segment\n"; 00984 return false; 00985 } 00986 } 00987 else if (seqLE(tcpseg->getSequenceNo(), state->last_ack_sent)) // Note: test is modified according to the latest proposal of the tcplw@cray.com list (Braden 1993/04/26) 00988 { 00989 state->ts_recent = option.getValues(0); 00990 tcpEV << "Updating ts_recent from segment: new ts_recent=" << state->ts_recent << "\n"; 00991 } 00992 } 00993 00994 return true; 00995 } 00996 00997 bool TCPConnection::processSACKPermittedOption(TCPSegment *tcpseg, const TCPOption& option) 00998 { 00999 if (option.getLength() != 2) 01000 { 01001 tcpEV << "ERROR: length incorrect\n"; 01002 return false; 01003 } 01004 01005 if (fsm.getState() != TCP_S_LISTEN && fsm.getState() != TCP_S_SYN_SENT) 01006 { 01007 tcpEV << "ERROR: TCP Header Option SACK_PERMITTED received, but in unexpected state\n"; 01008 return false; 01009 } 01010 01011 state->rcv_sack_perm = true; 01012 state->sack_enabled = state->sack_support && state->snd_sack_perm && state->rcv_sack_perm; 01013 tcpEV << "TCP Header Option SACK_PERMITTED received, SACK (sack_enabled) is set to: " << state->sack_enabled << "\n"; 01014 return true; 01015 } 01016 01017 bool TCPConnection::processSACKOption(TCPSegment *tcpseg, const TCPOption& option) 01018 { 01019 if (option.getLength() % 8 != 2) 01020 { 01021 tcpEV << "ERROR: option length incorrect\n"; 01022 return false; 01023 } 01024 01025 if (state->sack_enabled && fsm.getState() != TCP_S_SYN_RCVD && fsm.getState() != TCP_S_ESTABLISHED && fsm.getState() != TCP_S_FIN_WAIT_1 && fsm.getState() != TCP_S_FIN_WAIT_2) 01026 { 01027 tcpEV << "ERROR: TCP Header Option SACK received, but in unexpected state\n"; 01028 return false; 01029 } 01030 01031 if (!state->sack_enabled) 01032 { 01033 tcpEV << "ERROR: " << (option.getLength()/2) << ". SACK(s) received, but sack_enabled is set to " << state->sack_enabled << "\n"; 01034 return false; 01035 } 01036 01037 uint n = option.getValuesArraySize()/2; 01038 if (n > 0) // sacks present? 01039 { 01040 tcpEV << n << " SACK(s) received:\n"; 01041 uint count=0; 01042 for (uint i=0; i<n; i++) 01043 { 01044 Sack tmp; 01045 tmp.setStart(option.getValues(count)); 01046 count++; 01047 tmp.setEnd(option.getValues(count)); 01048 count++; 01049 01050 tcpEV << (i+1) << ". SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ")\n"; 01051 01052 // check for D-SACK 01053 if (i==0 && seqLess(tmp.getEnd(), tcpseg->getAckNo())) 01054 { 01055 // RFC 2883, page 8: 01056 // "In order for the sender to check that the first (D)SACK block of an 01057 // acknowledgement in fact acknowledges duplicate data, the sender 01058 // should compare the sequence space in the first SACK block to the 01059 // cumulative ACK which is carried IN THE SAME PACKET. If the SACK 01060 // sequence space is less than this cumulative ACK, it is an indication 01061 // that the segment identified by the SACK block has been received more 01062 // than once by the receiver. An implementation MUST NOT compare the 01063 // sequence space in the SACK block to the TCP state variable snd.una 01064 // (which carries the total cumulative ACK), as this may result in the 01065 // wrong conclusion if ACK packets are reordered." 01066 tcpEV << "Received D-SACK below cumulative ACK=" << tcpseg->getAckNo() << " D-SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ")\n"; 01067 } 01068 else if (i==0 && seqGE(tmp.getEnd(), tcpseg->getAckNo()) && n>1) 01069 { 01070 // RFC 2883, page 8: 01071 // "If the sequence space in the first SACK block is greater than the 01072 // cumulative ACK, then the sender next compares the sequence space in 01073 // the first SACK block with the sequence space in the second SACK 01074 // block, if there is one. This comparison can determine if the first 01075 // SACK block is reporting duplicate data that lies above the cumulative 01076 // ACK." 01077 Sack tmp2; 01078 tmp2.setStart(option.getValues(2)); 01079 tmp2.setEnd(option.getValues(3)); 01080 01081 if (seqGE(tmp.getStart(), tmp2.getStart()) && seqLE(tmp.getEnd(), tmp2.getEnd())) 01082 {tcpEV << "Received D-SACK above cumulative ACK=" << tcpseg->getAckNo() << " D-SACK:" << " [" << tmp.getStart() << ".." << tmp.getEnd() << ") SACK:" << " [" << tmp2.getStart() << ".." << tmp2.getEnd() << ")\n";} 01083 } 01084 01085 if (seqGreater(tmp.getEnd(), tcpseg->getAckNo())) 01086 rexmitQueue->setSackedBit(tmp.getStart(), tmp.getEnd()); 01087 } 01088 state->rcv_sacks = state->rcv_sacks + n; // total counter, no current number 01089 if (rcvSacksVector) 01090 rcvSacksVector->record(state->rcv_sacks); 01091 01092 // update scoreboard 01093 state->sackedBytes_old = state->sackedBytes; // needed for RFC 3042 to check if last dupAck contained new sack information 01094 state->sackedBytes = rexmitQueue->getTotalAmountOfSackedBytes(); 01095 if (sackedBytesVector) 01096 sackedBytesVector->record(state->sackedBytes); 01097 } 01098 return true; 01099 } 01100 01101 TCPSegment TCPConnection::writeHeaderOptions(TCPSegment *tcpseg) 01102 { 01103 TCPOption option; 01104 uint t = 0; 01105 01106 if (tcpseg->getSynBit() && (fsm.getState() == TCP_S_INIT || fsm.getState() == TCP_S_LISTEN || ((fsm.getState()==TCP_S_SYN_SENT || fsm.getState()==TCP_S_SYN_RCVD) && state->syn_rexmit_count>0))) // SYN flag set and connetion in INIT or LISTEN state (or after synRexmit timeout) 01107 { 01108 // MSS header option 01109 if (state->snd_mss > 0) 01110 { 01111 option.setKind(TCPOPTION_MAXIMUM_SEGMENT_SIZE); // MSS 01112 option.setLength(4); 01113 option.setValuesArraySize(1); 01114 01115 // Update MSS 01116 option.setValues(0,state->snd_mss); 01117 tcpEV << "TCP Header Option MSS(=" << state->snd_mss << ") sent\n"; 01118 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01119 tcpseg->setOptions(t,option); 01120 t++; 01121 } 01122 01123 // WS header option 01124 if (state->ws_support && (state->rcv_ws || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is WS supported by host? 01125 { 01126 // 1 padding byte 01127 option.setKind(TCPOPTION_NO_OPERATION); // NOP 01128 option.setLength(1); 01129 option.setValuesArraySize(0); 01130 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01131 tcpseg->setOptions(t,option); 01132 t++; 01133 01134 option.setKind(TCPOPTION_WINDOW_SCALE); 01135 option.setLength(3); 01136 option.setValuesArraySize(1); 01137 01138 // Update WS variables 01139 ulong scaled_rcv_wnd = receiveQueue->getAmountOfFreeBytes(state->maxRcvBuffer); 01140 state->rcv_wnd_scale = 0; 01141 while (scaled_rcv_wnd > TCP_MAX_WIN && state->rcv_wnd_scale < 14) // RFC 1323, page 11: "the shift count must be limited to 14" 01142 { 01143 scaled_rcv_wnd = scaled_rcv_wnd >> 1; 01144 state->rcv_wnd_scale++; 01145 } 01146 option.setValues(0,state->rcv_wnd_scale); // rcv_wnd_scale is also set in scaleRcvWnd() 01147 state->snd_ws = true; 01148 state->ws_enabled = state->ws_support && state->snd_ws && state->rcv_ws; 01149 tcpEV << "TCP Header Option WS(=" << option.getValues(0) << ") sent, WS (ws_enabled) is set to: " << state->ws_enabled << "\n"; 01150 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01151 tcpseg->setOptions(t,option); 01152 t++; 01153 } 01154 01155 // SACK_PERMITTED header option 01156 if (state->sack_support && (state->rcv_sack_perm || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is SACK supported by host? 01157 { 01158 if (!state->ts_support) // if TS is supported by host, do not add NOPs to this segment 01159 { 01160 // 2 padding bytes 01161 option.setKind(TCPOPTION_NO_OPERATION); // NOP 01162 option.setLength(1); 01163 option.setValuesArraySize(0); 01164 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2); 01165 tcpseg->setOptions(t,option); 01166 t++; 01167 tcpseg->setOptions(t,option); 01168 t++; 01169 } 01170 01171 option.setKind(TCPOPTION_SACK_PERMITTED); 01172 option.setLength(2); 01173 option.setValuesArraySize(0); 01174 01175 // Update SACK variables 01176 state->snd_sack_perm = true; 01177 state->sack_enabled = state->sack_support && state->snd_sack_perm && state->rcv_sack_perm; 01178 tcpEV << "TCP Header Option SACK_PERMITTED sent, SACK (sack_enabled) is set to: " << state->sack_enabled << "\n"; 01179 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01180 tcpseg->setOptions(t,option); 01181 t++; 01182 } 01183 01184 // TS header option 01185 if (state->ts_support && (state->rcv_initial_ts || (fsm.getState() == TCP_S_INIT || (fsm.getState()==TCP_S_SYN_SENT && state->syn_rexmit_count>0)))) // Is TS supported by host? 01186 { 01187 if (!state->sack_support) // if SACK is supported by host, do not add NOPs to this segment 01188 { 01189 // 2 padding bytes 01190 option.setKind(TCPOPTION_NO_OPERATION); // NOP 01191 option.setLength(1); 01192 option.setValuesArraySize(0); 01193 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2); 01194 tcpseg->setOptions(t,option); 01195 t++; 01196 tcpseg->setOptions(t,option); 01197 t++; 01198 } 01199 01200 option.setKind(TCPOPTION_TIMESTAMP); 01201 option.setLength(10); 01202 option.setValuesArraySize(2); 01203 01204 // Update TS variables 01205 // RFC 1323, page 13: "The Timestamp Value field (TSval) contains the current value of the timestamp clock of the TCP sending the option." 01206 option.setValues(0,convertSimtimeToTS(simTime())); 01207 // RFC 1323, page 16: "(3) When a TSopt is sent, its TSecr field is set to the current TS.Recent value." 01208 // RFC 1323, page 13: 01209 // "The Timestamp Echo Reply field (TSecr) is only valid if the ACK 01210 // bit is set in the TCP header; if it is valid, it echos a times- 01211 // tamp value that was sent by the remote TCP in the TSval field 01212 // of a Timestamps option. When TSecr is not valid, its value 01213 // must be zero." 01214 if (tcpseg->getAckBit()) 01215 option.setValues(1,state->ts_recent); 01216 else 01217 option.setValues(1,0); 01218 state->snd_initial_ts = true; 01219 state->ts_enabled = state->ts_support && state->snd_initial_ts && state->rcv_initial_ts; 01220 tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") sent, TS (ts_enabled) is set to: " << state->ts_enabled << "\n"; 01221 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01222 tcpseg->setOptions(t,option); 01223 t++; 01224 } 01225 01226 // TODO add new TCPOptions here once they are implemented 01227 } 01228 else if (fsm.getState()==TCP_S_SYN_SENT || fsm.getState()==TCP_S_SYN_RCVD || fsm.getState()==TCP_S_ESTABLISHED || fsm.getState()==TCP_S_FIN_WAIT_1 || fsm.getState()==TCP_S_FIN_WAIT_2) // connetion is not in INIT or LISTEN state 01229 { 01230 // TS header option 01231 if (state->ts_enabled) // Is TS enabled? 01232 { 01233 if (!(state->sack_enabled && (state->snd_sack || state->snd_dsack))) // if SACK is enabled and SACKs need to be added, do not add NOPs to this segment 01234 { 01235 // 2 padding bytes 01236 option.setKind(TCPOPTION_NO_OPERATION); // NOP 01237 option.setLength(1); 01238 option.setValuesArraySize(0); 01239 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2); 01240 tcpseg->setOptions(t,option); 01241 t++; 01242 tcpseg->setOptions(t,option); 01243 t++; 01244 } 01245 01246 option.setKind(TCPOPTION_TIMESTAMP); 01247 option.setLength(10); 01248 option.setValuesArraySize(2); 01249 01250 // Update TS variables 01251 // RFC 1323, page 13: "The Timestamp Value field (TSval) contains the current value of the timestamp clock of the TCP sending the option." 01252 option.setValues(0,convertSimtimeToTS(simTime())); 01253 // RFC 1323, page 16: "(3) When a TSopt is sent, its TSecr field is set to the current TS.Recent value." 01254 // RFC 1323, page 13: 01255 // "The Timestamp Echo Reply field (TSecr) is only valid if the ACK 01256 // bit is set in the TCP header; if it is valid, it echos a times- 01257 // tamp value that was sent by the remote TCP in the TSval field 01258 // of a Timestamps option. When TSecr is not valid, its value 01259 // must be zero." 01260 if (tcpseg->getAckBit()) 01261 option.setValues(1,state->ts_recent); 01262 else 01263 option.setValues(1,0); 01264 tcpEV << "TCP Header Option TS(TSval=" << option.getValues(0) << ", TSecr=" << option.getValues(1) << ") sent\n"; 01265 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01266 tcpseg->setOptions(t,option); 01267 t++; 01268 } 01269 01270 // SACK header option 01271 01272 // RFC 2018, page 4: 01273 // "If sent at all, SACK options SHOULD be included in all ACKs which do 01274 // not ACK the highest sequence number in the data receiver's queue. In 01275 // this situation the network has lost or mis-ordered data, such that 01276 // the receiver holds non-contiguous data in its queue. RFC 1122, 01277 // Section 4.2.2.21, discusses the reasons for the receiver to send ACKs 01278 // in response to additional segments received in this state. The 01279 // receiver SHOULD send an ACK for every valid segment that arrives 01280 // containing new data, and each of these "duplicate" ACKs SHOULD bear a 01281 // SACK option." 01282 if (state->sack_enabled && (state->snd_sack || state->snd_dsack)) 01283 { 01284 if (!state->ts_enabled) // if TS is enabled, do not add NOPs to this segment 01285 { 01286 // 2 padding bytes 01287 option.setKind(TCPOPTION_NO_OPERATION); // NOP 01288 option.setLength(1); 01289 option.setValuesArraySize(0); 01290 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+2); 01291 tcpseg->setOptions(t,option); 01292 t++; 01293 tcpseg->setOptions(t,option); 01294 t++; 01295 } 01296 01297 addSacks(tcpseg); 01298 t++; 01299 } 01300 01301 // TODO add new TCPOptions here once they are implemented 01302 01303 // TODO delegate to TCPAlgorithm as well -- it may want to append additional options 01304 } 01305 01306 if (tcpseg->getOptionsArraySize() != 0) 01307 { 01308 uint options_len = 0; 01309 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 01310 options_len = options_len + tcpseg->getOptions(i).getLength(); 01311 01312 if (options_len <= 40) // Options length allowed? - maximum: 40 Bytes 01313 tcpseg->setHeaderLength(TCP_HEADER_OCTETS+options_len); // TCP_HEADER_OCTETS = 20 01314 else 01315 { 01316 tcpseg->setHeaderLength(TCP_HEADER_OCTETS); // TCP_HEADER_OCTETS = 20 01317 tcpseg->setOptionsArraySize(0); // drop all options 01318 tcpEV << "ERROR: Options length exceeded! Segment will be sent without options" << "\n"; 01319 } 01320 } 01321 01322 return *tcpseg; 01323 } 01324 01325 TCPSegment TCPConnection::addSacks(TCPSegment *tcpseg) 01326 { 01327 TCPOption option; 01328 uint options_len = 0; 01329 uint used_options_len = 0; 01330 uint m = 0; // number of sack blocks to be sent in current segment 01331 uint n = 0; // number of sack blocks in sacks_array before sending current segment 01332 bool skip_sacks_array = false; // set if dsack is subsets of a bigger sack block recently reported 01333 bool overlap = false; // set if recently reported sack blocks are subsets of "sacks_array[0]" 01334 01335 uint32 start = state->start_seqno; 01336 uint32 end = state->end_seqno; 01337 01338 ASSERT(start!=0 || end!=0); 01339 01340 // delete old sacks (below rcv_nxt), delete duplicates and print previous status of sacks_array: 01341 tcpEV << "Previous status of sacks_array: \n"; 01342 for (uint a=0; a<MAX_SACK_BLOCKS; a++) // MAX_SACK_BLOCKS is set to 60 01343 { 01344 if (state->sacks_array[a].getStart()!=0 && seqLE(state->sacks_array[a].getEnd(), state->rcv_nxt)) 01345 { 01346 state->sacks_array[a].setStart(0); 01347 state->sacks_array[a].setEnd(0); 01348 } 01349 if (state->sacks_array[a].getStart()!=0 && state->sacks_array[a].getEnd()!=0) // do not print empty entries 01350 tcpEV << "\t" << (a+1) << ". SACK in sacks_array:" << " [" << state->sacks_array[a].getStart() << ".." << state->sacks_array[a].getEnd() << ")\n"; 01351 else 01352 break; 01353 } 01354 01355 for (uint a=0; a<MAX_SACK_BLOCKS-1; a++) 01356 { 01357 if (state->sacks_array[a].getStart() != 0) 01358 m++; 01359 else 01360 break; 01361 } 01362 n = m + 1; // +1 for new the new sack block 01363 01364 // 2 padding bytes are prefixed or TS option is present 01365 if (tcpseg->getOptionsArraySize()>0) 01366 { 01367 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 01368 used_options_len = used_options_len + tcpseg->getOptions(i).getLength(); 01369 if (used_options_len>30) 01370 { 01371 tcpEV << "ERROR: Failed to addSacks - at least 10 free bytes needed for SACK - used_options_len=" << used_options_len << "\n"; 01372 //reset flags: 01373 skip_sacks_array = false; 01374 state->snd_sack = false; 01375 state->snd_dsack = false; 01376 state->start_seqno = 0; 01377 state->end_seqno = 0; 01378 return *tcpseg; 01379 } 01380 else 01381 { 01382 n = std::min (n, (((40-used_options_len)-2)/8)); 01383 option.setValuesArraySize(n*2); 01384 } 01385 } 01386 else 01387 { 01388 n = std::min (n, MAX_SACK_ENTRIES); 01389 option.setValuesArraySize(n*2); 01390 } 01391 01392 // before adding a new sack move old sacks by one to the right 01393 for (int a=(MAX_SACK_BLOCKS-1); a>=0; a--) // MAX_SACK_BLOCKS is set to 60 01394 state->sacks_array[a+1] = state->sacks_array[a]; 01395 01396 if (state->snd_dsack) // SequenceNo < rcv_nxt 01397 { 01398 // RFC 2883, page 3: 01399 // "(3) The left edge of the D-SACK block specifies the first sequence 01400 // number of the duplicate contiguous sequence, and the right edge of 01401 // the D-SACK block specifies the sequence number immediately following 01402 // the last sequence in the duplicate contiguous sequence." 01403 if (seqLess(start, state->rcv_nxt) && seqLess(state->rcv_nxt, end)) 01404 end = state->rcv_nxt; 01405 } 01406 else if (start==0 && end==0) // rcv_nxt_old != rcv_nxt 01407 { 01408 // RFC 2018, page 4: 01409 // "* The first SACK block (i.e., the one immediately following the 01410 // kind and length fields in the option) MUST specify the contiguous 01411 // block of data containing the segment which triggered this ACK, 01412 // unless that segment advanced the Acknowledgment Number field in 01413 // the header. This assures that the ACK with the SACK option 01414 // reflects the most recent change in the data receiver's buffer 01415 // queue." 01416 start = state->sacks_array[0].getStart(); 01417 end = state->sacks_array[0].getEnd(); 01418 } 01419 else // rcv_nxt_old == rcv_nxt or end <= rcv_nxt 01420 { 01421 // RFC 2018, page 4: 01422 // "* The first SACK block (i.e., the one immediately following the 01423 // kind and length fields in the option) MUST specify the contiguous 01424 // block of data containing the segment which triggered this ACK," 01425 start = receiveQueue->getLE(start); 01426 end = receiveQueue->getRE(end); 01427 } 01428 01429 state->sacks_array[0].setStart(start); 01430 state->sacks_array[0].setEnd(end); 01431 01432 // RFC 2883, page 3: 01433 // "(4) If the D-SACK block reports a duplicate contiguous sequence from 01434 // a (possibly larger) block of data in the receiver's data queue above 01435 // the cumulative acknowledgement, then the second SACK block in that 01436 // SACK option should specify that (possibly larger) block of data. 01437 // 01438 // (5) Following the SACK blocks described above for reporting duplicate 01439 // segments, additional SACK blocks can be used for reporting additional 01440 // blocks of data, as specified in RFC 2018." 01441 if (state->snd_dsack) 01442 { 01443 uint32 start_new = receiveQueue->getLE(start); 01444 uint32 end_new = receiveQueue->getRE(end); 01445 if (start_new != start || end_new != end) 01446 { 01447 skip_sacks_array = true; 01448 for (int a=(MAX_SACK_BLOCKS-1); a>=1; a--) // MAX_SACK_BLOCKS is set to 60 01449 state->sacks_array[a+1] = state->sacks_array[a]; 01450 state->sacks_array[1].setStart(start_new); // specifies larger block of data 01451 state->sacks_array[1].setEnd(end_new); // specifies larger block of data 01452 } 01453 } 01454 01455 // RFC 2018, page 4: 01456 // "* The SACK option SHOULD be filled out by repeating the most 01457 // recently reported SACK blocks (based on first SACK blocks in 01458 // previous SACK options) that are not subsets of a SACK block 01459 // already included in the SACK option being constructed." 01460 01461 // check if recently reported SACK blocks are subsets of "sacks_array[0]" 01462 for (uint a=0; a<MAX_SACK_BLOCKS-1; a++) 01463 { 01464 uint i = 1; 01465 bool matched = false; 01466 01467 if (a==0 && skip_sacks_array) 01468 a = 1; 01469 01470 if (state->sacks_array[a+i].getStart() == 0) 01471 break; 01472 01473 while ((state->sacks_array[a].getStart() == state->sacks_array[a+i].getStart() || 01474 state->sacks_array[a].getEnd() == state->sacks_array[a+i].getStart() || 01475 state->sacks_array[a].getEnd() == state->sacks_array[a+i].getEnd()) 01476 && a+i < MAX_SACK_BLOCKS && state->sacks_array[a].getStart()!=0) // MAX_SACK_BLOCKS is set to 60 01477 { 01478 matched = true; 01479 i++; 01480 overlap = true; 01481 } 01482 if (matched) 01483 state->sacks_array[a+1] = state->sacks_array[a+i]; 01484 } 01485 01486 if (!skip_sacks_array && overlap && m<4) 01487 n--; 01488 01489 option.setKind(TCPOPTION_SACK); 01490 option.setLength(8*n+2); 01491 option.setValuesArraySize(2*n); 01492 01493 // write sacks from sacks_array to options 01494 uint counter = 0; 01495 for (uint a=0; a<n; a++) 01496 { 01497 option.setValues(counter,state->sacks_array[a].getStart()); 01498 counter++; 01499 option.setValues(counter,state->sacks_array[a].getEnd()); 01500 counter++; 01501 } 01502 01503 // independent of "n" we always need 2 padding bytes (NOP) to make: (used_options_len % 4 == 0) 01504 options_len = used_options_len + 8*n + 2; // 8 bytes for each SACK (n) + 2 bytes for kind&length 01505 01506 if (options_len <= 40) // Options length allowed? - maximum: 40 Bytes 01507 { 01508 tcpseg->setOptionsArraySize(tcpseg->getOptionsArraySize()+1); 01509 tcpseg->setOptions((tcpseg->getOptionsArraySize()-1),option); 01510 01511 // update number of sent sacks 01512 state->snd_sacks = state->snd_sacks+n; 01513 if (sndSacksVector) 01514 sndSacksVector->record(state->snd_sacks); 01515 01516 uint counter = 0; 01517 tcpEV << n << " SACK(s) added to header:\n"; 01518 for (uint t=0; t<(n*2); t++) 01519 { 01520 counter++; 01521 tcpEV << counter << ". SACK:" << " [" << option.getValues(t); 01522 t++; 01523 tcpEV << ".." << option.getValues(t) << ")"; 01524 if (t==1) 01525 { 01526 if (state->snd_dsack) 01527 tcpEV << " (D-SACK)"; 01528 else if (seqLE(option.getValues(t),state->rcv_nxt)) 01529 { 01530 tcpEV << " (received segment filled out a gap)"; 01531 state->snd_dsack = true; // Note: Set snd_dsack to delete first sack from sacks_array 01532 } 01533 } 01534 tcpEV << "\n"; 01535 } 01536 } 01537 else 01538 tcpEV << "ERROR: Option length exceeded! Segment will be sent without SACK(s)" << "\n"; 01539 01540 // RFC 2883, page 3: 01541 // "(1) A D-SACK block is only used to report a duplicate contiguous 01542 // sequence of data received by the receiver in the most recent packet. 01543 // 01544 // (2) Each duplicate contiguous sequence of data received is reported 01545 // in at most one D-SACK block. (I.e., the receiver sends two identical 01546 // D-SACK blocks in subsequent packets only if the receiver receives two 01547 // duplicate segments.)// 01548 // 01549 // In case of d-sack: delete first sack (d-sack) and move old sacks by one to the left 01550 if (state->snd_dsack) 01551 { 01552 for (int a=1; a<MAX_SACK_BLOCKS; a++) // MAX_SACK_BLOCKS is set to 60 01553 state->sacks_array[a-1] = state->sacks_array[a]; 01554 01555 // delete/reset last sack to avoid duplicates 01556 state->sacks_array[MAX_SACK_BLOCKS-1].setStart(0); 01557 state->sacks_array[MAX_SACK_BLOCKS-1].setEnd(0); 01558 } 01559 01560 // reset flags: 01561 skip_sacks_array = false; 01562 state->snd_sack = false; 01563 state->snd_dsack = false; 01564 state->start_seqno = 0; 01565 state->end_seqno = 0; 01566 01567 return *tcpseg; 01568 } 01569 01570 uint32 TCPConnection::getTSval(TCPSegment *tcpseg) 01571 { 01572 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 01573 { 01574 const TCPOption& option = tcpseg->getOptions(i); 01575 short kind = option.getKind(); 01576 if (kind == TCPOPTION_TIMESTAMP) 01577 return option.getValues(0); 01578 } 01579 return 0; 01580 } 01581 01582 uint32 TCPConnection::getTSecr(TCPSegment *tcpseg) 01583 { 01584 for (uint i=0; i<tcpseg->getOptionsArraySize(); i++) 01585 { 01586 const TCPOption& option = tcpseg->getOptions(i); 01587 short kind = option.getKind(); 01588 if (kind == TCPOPTION_TIMESTAMP) 01589 return option.getValues(1); 01590 } 01591 return 0; 01592 } 01593 01594 void TCPConnection::updateRcvQueueVars() 01595 { 01596 // update receive queue related state variables 01597 state->freeRcvBuffer = receiveQueue->getAmountOfFreeBytes(state->maxRcvBuffer); 01598 state->usedRcvBuffer = state->maxRcvBuffer - state->freeRcvBuffer; 01599 01600 // update receive queue related statistics 01601 if (tcpRcvQueueBytesVector) 01602 tcpRcvQueueBytesVector->record(state->usedRcvBuffer); 01603 01604 // tcpEV << "receiveQ: receiveQLength=" << receiveQueue->getQueueLength() << " maxRcvBuffer=" << state->maxRcvBuffer << " usedRcvBuffer=" << state->usedRcvBuffer << " freeRcvBuffer=" << state->freeRcvBuffer << "\n"; 01605 } 01606 01607 unsigned short TCPConnection::updateRcvWnd() 01608 { 01609 uint32 win = 0; 01610 01611 // update receive queue related state variables and statistics 01612 updateRcvQueueVars(); 01613 win = state->freeRcvBuffer; 01614 01615 // Following lines are based on [Stevens, W.R.: TCP/IP Illustrated, Volume 2, pages 878-879]: 01616 // Don't advertise less than one full-sized segment to avoid SWS 01617 if (win < (state->maxRcvBuffer / 4) && win < state->snd_mss) 01618 win = 0; 01619 01620 // Do not shrink window 01621 // (rcv_adv minus rcv_nxt) is the amount of space still available to the sender that was previously advertised 01622 if (win < state->rcv_adv - state->rcv_nxt) 01623 win = state->rcv_adv - state->rcv_nxt; 01624 01625 // Observe upper limit for advertised window on this connection 01626 if (win > TCP_MAX_WIN && !state->ws_enabled) // TCP_MAX_WIN = 65535 (16 bit) 01627 win = TCP_MAX_WIN; // Note: The window size is limited to a 16 bit value in the TCP header if WINDOW SCALE option (RFC 1323) is not used 01628 01629 // Note: The order of the "Do not shrink window" and "Observe upper limit" parts has been changed to the order used in FreeBSD Release 7.1 01630 01631 // update rcv_adv if needed 01632 if (win > 0 && seqGE(state->rcv_nxt + win, state->rcv_adv)) 01633 { 01634 state->rcv_adv = state->rcv_nxt + win; 01635 if (rcvAdvVector) 01636 rcvAdvVector->record(state->rcv_adv); 01637 } 01638 01639 state->rcv_wnd = win; 01640 if (rcvWndVector) 01641 rcvWndVector->record(state->rcv_wnd); 01642 01643 // scale rcv_wnd: 01644 uint32 scaled_rcv_wnd = state->rcv_wnd; 01645 state->rcv_wnd_scale = 0; 01646 if (state->ws_enabled) 01647 { 01648 while (scaled_rcv_wnd > TCP_MAX_WIN && state->rcv_wnd_scale < 14) // RFC 1323, page 11: "the shift count must be limited to 14" 01649 { 01650 scaled_rcv_wnd = scaled_rcv_wnd >> 1; 01651 state->rcv_wnd_scale++; 01652 } 01653 } 01654 ASSERT(scaled_rcv_wnd == (unsigned short)scaled_rcv_wnd); 01655 return (unsigned short) scaled_rcv_wnd; 01656 } 01657 01658 void TCPConnection::updateWndInfo(TCPSegment *tcpseg, bool doAlways) 01659 { 01660 uint32 true_window = tcpseg->getWindow(); 01661 // RFC 1323, page 10: 01662 // "The window field (SEG.WND) in the header of every incoming 01663 // segment, with the exception of SYN segments, is left-shifted 01664 // by Snd.Wind.Scale bits before updating SND.WND: 01665 // SND.WND = SEG.WND << Snd.Wind.Scale" 01666 if (state->ws_enabled && !tcpseg->getSynBit()) 01667 true_window = tcpseg->getWindow() << state->snd_wnd_scale; 01668 01669 // Following lines are based on [Stevens, W.R.: TCP/IP Illustrated, Volume 2, page 982]: 01670 if (doAlways || (tcpseg->getAckBit() 01671 && (seqLess(state->snd_wl1, tcpseg->getSequenceNo()) || 01672 (state->snd_wl1 == tcpseg->getSequenceNo() && seqLE(state->snd_wl2, tcpseg->getAckNo())) || 01673 (state->snd_wl2 == tcpseg->getAckNo() && true_window > state->snd_wnd)))) 01674 { 01675 // send window should be updated 01676 state->snd_wnd = true_window; 01677 tcpEV << "Updating send window from segment: new wnd=" << state->snd_wnd << "\n"; 01678 state->snd_wl1 = tcpseg->getSequenceNo(); 01679 state->snd_wl2 = tcpseg->getAckNo(); 01680 if (sndWndVector) 01681 sndWndVector->record(state->snd_wnd); 01682 } 01683 } 01684 01685 bool TCPConnection::isLost(uint32 seqNum) 01686 { 01687 ASSERT (state->sack_enabled); 01688 // RFC 3517, page 3: "This routine returns whether the given sequence number is 01689 // considered to be lost. The routine returns true when either 01690 // DupThresh discontiguous SACKed sequences have arrived above 01691 // 'SeqNum' or (DupThresh * SMSS) bytes with sequence numbers greater 01692 // than 'SeqNum' have been SACKed. Otherwise, the routine returns 01693 // false." 01694 bool isLost = false; 01695 01696 ASSERT(seqGE(seqNum,state->snd_una)); // HighAck = snd_una 01697 01698 if (rexmitQueue->getNumOfDiscontiguousSacks(seqNum) >= DUPTHRESH || // DUPTHRESH = 3 01699 rexmitQueue->getAmountOfSackedBytes(seqNum) >= (DUPTHRESH * state->snd_mss)) 01700 isLost = true; 01701 else 01702 isLost = false; 01703 01704 return isLost; 01705 } 01706 01707 void TCPConnection::setPipe() 01708 { 01709 ASSERT (state->sack_enabled); 01710 // RFC 3517, pages 1 and 2: " 01711 // "HighACK" is the sequence number of the highest byte of data that 01712 // has been cumulatively ACKed at a given point. 01713 // 01714 // "HighData" is the highest sequence number transmitted at a given 01715 // point. 01716 // 01717 // "HighRxt" is the highest sequence number which has been 01718 // retransmitted during the current loss recovery phase. 01719 // 01720 // "Pipe" is a sender's estimate of the number of bytes outstanding 01721 // in the network. This is used during recovery for limiting the 01722 // sender's sending rate. The pipe variable allows TCP to use a 01723 // fundamentally different congestion control than specified in 01724 // [RFC2581]. The algorithm is often referred to as the "pipe 01725 // algorithm"." 01726 // HighAck = snd_una 01727 // HighData = snd_max 01728 01729 state->highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 01730 state->pipe = 0; 01731 01732 uint32 shift = state->snd_mss; 01733 if (state->ts_enabled) 01734 shift -= TCP_OPTION_TS_SIZE; 01735 01736 // RFC 3517, page 3: "This routine traverses the sequence space from HighACK to HighData 01737 // and MUST set the "pipe" variable to an estimate of the number of 01738 // octets that are currently in transit between the TCP sender and 01739 // the TCP receiver. After initializing pipe to zero the following 01740 // steps are taken for each octet 'S1' in the sequence space between 01741 // HighACK and HighData that has not been SACKed:" 01742 for (uint32 s1=state->snd_una; s1<state->snd_max; s1=s1+shift) 01743 { 01744 if (rexmitQueue->getSackedBit(s1)==false) 01745 { 01746 // RFC 3517, page 3: "(a) If IsLost (S1) returns false: 01747 // 01748 // Pipe is incremented by 1 octet. 01749 // 01750 // The effect of this condition is that pipe is incremented for 01751 // packets that have not been SACKed and have not been determined 01752 // to have been lost (i.e., those segments that are still assumed 01753 // to be in the network)." 01754 if (isLost(s1)==false) 01755 state->pipe++; 01756 01757 // RFC 3517, pages 3 and 4: "(b) If S1 <= HighRxt: 01758 // 01759 // Pipe is incremented by 1 octet. 01760 // 01761 // The effect of this condition is that pipe is incremented for 01762 // the retransmission of the octet. 01763 // 01764 // Note that octets retransmitted without being considered lost are 01765 // counted twice by the above mechanism." 01766 if (seqLE(s1,state->highRxt)) 01767 state->pipe++; 01768 } 01769 } 01770 01771 state->pipe = state->pipe * shift; 01772 if (pipeVector) 01773 pipeVector->record(state->pipe); 01774 } 01775 01776 uint32 TCPConnection::nextSeg() 01777 { 01778 ASSERT (state->sack_enabled); 01779 // RFC 3517, page 5: "This routine uses the scoreboard data structure maintained by the 01780 // Update() function to determine what to transmit based on the SACK 01781 // information that has arrived from the data receiver (and hence 01782 // been marked in the scoreboard). NextSeg () MUST return the 01783 // sequence number range of the next segment that is to be 01784 // transmitted, per the following rules:" 01785 01786 state->highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 01787 uint32 seqNum = 0; 01788 bool found = false; 01789 uint32 shift = state->snd_mss; 01790 if (state->ts_enabled) 01791 shift -= TCP_OPTION_TS_SIZE; 01792 01793 // RFC 3517, page 5: "(1) If there exists a smallest unSACKed sequence number 'S2' that 01794 // meets the following three criteria for determining loss, the 01795 // sequence range of one segment of up to SMSS octets starting 01796 // with S2 MUST be returned. 01797 // 01798 // (1.a) S2 is greater than HighRxt. 01799 // 01800 // (1.b) S2 is less than the highest octet covered by any 01801 // received SACK. 01802 // 01803 // (1.c) IsLost (S2) returns true." 01804 for (uint32 s2=state->snd_una; s2<state->snd_max; s2=s2+shift) 01805 { 01806 if (rexmitQueue->getSackedBit(s2)==false) 01807 { 01808 if (seqGE(s2,state->highRxt) && 01809 seqLE(s2,(rexmitQueue->getHighestSackedSeqNum())) && 01810 isLost(s2)) 01811 { 01812 seqNum = s2; 01813 found = true; 01814 return seqNum; 01815 } 01816 } 01817 } 01818 01819 // RFC 3517, page 5: "(2) If no sequence number 'S2' per rule (1) exists but there 01820 // exists available unsent data and the receiver's advertised 01821 // window allows, the sequence range of one segment of up to SMSS 01822 // octets of previously unsent data starting with sequence number 01823 // HighData+1 MUST be returned." 01824 if (!found) 01825 { 01826 // check how many unsent bytes we have 01827 ulong buffered = sendQueue->getBytesAvailable(state->snd_max); 01828 ulong maxWindow = state->snd_wnd; 01829 // effectiveWindow: number of bytes we're allowed to send now 01830 ulong effectiveWin = maxWindow - state->pipe; 01831 if (buffered > 0 && effectiveWin >= state->snd_mss) 01832 { 01833 seqNum = state->snd_max; // HighData = snd_max 01834 found = true; 01835 return seqNum; 01836 } 01837 } 01838 01839 // RFC 3517, pages 5 and 6: "(3) If the conditions for rules (1) and (2) fail, but there exists 01840 // an unSACKed sequence number 'S3' that meets the criteria for 01841 // detecting loss given in steps (1.a) and (1.b) above 01842 // (specifically excluding step (1.c)) then one segment of up to 01843 // SMSS octets starting with S3 MAY be returned. 01844 // 01845 // Note that rule (3) is a sort of retransmission "last resort". 01846 // It allows for retransmission of sequence numbers even when the 01847 // sender has less certainty a segment has been lost than as with 01848 // rule (1). Retransmitting segments via rule (3) will help 01849 // sustain TCP's ACK clock and therefore can potentially help 01850 // avoid retransmission timeouts. However, in sending these 01851 // segments the sender has two copies of the same data considered 01852 // to be in the network (and also in the Pipe estimate). When an 01853 // ACK or SACK arrives covering this retransmitted segment, the 01854 // sender cannot be sure exactly how much data left the network 01855 // (one of the two transmissions of the packet or both 01856 // transmissions of the packet). Therefore the sender may 01857 // underestimate Pipe by considering both segments to have left 01858 // the network when it is possible that only one of the two has. 01859 // 01860 // We believe that the triggering of rule (3) will be rare and 01861 // that the implications are likely limited to corner cases 01862 // relative to the entire recovery algorithm. Therefore we leave 01863 // the decision of whether or not to use rule (3) to 01864 // implementors." 01865 if (!found) 01866 { 01867 for (uint32 s3=state->snd_una; s3<state->snd_max; s3=s3+shift) 01868 { 01869 if (rexmitQueue->getSackedBit(s3)==false) 01870 { 01871 if (seqGE(s3,state->highRxt) && 01872 seqLE(s3,(rexmitQueue->getHighestSackedSeqNum()))) 01873 { 01874 seqNum = s3; 01875 found = true; 01876 return seqNum; 01877 } 01878 } 01879 } 01880 } 01881 01882 // RFC 3517, page 6: "(4) If the conditions for each of (1), (2), and (3) are not met, 01883 // then NextSeg () MUST indicate failure, and no segment is 01884 // returned." 01885 if (!found) 01886 seqNum = 0; 01887 01888 return seqNum; 01889 } 01890 01891 void TCPConnection::sendDataDuringLossRecoveryPhase(uint32 congestionWindow) 01892 { 01893 ASSERT (state->sack_enabled && state->lossRecovery); 01894 // RFC 3517 pages 7 and 8: "(5) In order to take advantage of potential additional available 01895 // cwnd, proceed to step (C) below. 01896 // (...) 01897 // (C) If cwnd - pipe >= 1 SMSS the sender SHOULD transmit one or more 01898 // segments as follows: 01899 // (...) 01900 // (C.5) If cwnd - pipe >= 1 SMSS, return to (C.1)" 01901 while (((int)congestionWindow - (int)state->pipe) >= (int)state->snd_mss) // Note: Typecast needed to avoid prohibited transmissions 01902 { 01903 // RFC 3517 pages 7 and 8: "(C.1) The scoreboard MUST be queried via NextSeg () for the 01904 // sequence number range of the next segment to transmit (if any), 01905 // and the given segment sent. If NextSeg () returns failure (no 01906 // data to send) return without sending anything (i.e., terminate 01907 // steps C.1 -- C.5)." 01908 uint32 seqNum = nextSeg(); // if nextSeg() returns 0 (=failure): terminate steps C.1 -- C.5 01909 if (seqNum != 0) 01910 { 01911 sendSegmentDuringLossRecoveryPhase(seqNum); 01912 // RFC 3517 page 8: "(C.4) The estimate of the amount of data outstanding in the 01913 // network must be updated by incrementing pipe by the number of 01914 // octets transmitted in (C.1)." 01915 state->pipe += state->sentBytes; 01916 } 01917 else // nextSeg () returns failure: terminate steps C.1 -- C.5 01918 break; 01919 } 01920 } 01921 01922 void TCPConnection::sendSegmentDuringLossRecoveryPhase(uint32 seqNum) 01923 { 01924 ASSERT (state->sack_enabled && state->lossRecovery); 01925 // start sending from seqNum 01926 state->snd_nxt = seqNum; 01927 01928 uint32 old_highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 01929 01930 // no need to check cwnd and rwnd - has already be done before 01931 // no need to check nagle - sending mss bytes 01932 sendSegment(state->snd_mss); 01933 01934 uint32 sentSeqNum = seqNum + state->sentBytes; 01935 01936 // RFC 3517 page 8: "(C.2) If any of the data octets sent in (C.1) are below HighData, 01937 // HighRxt MUST be set to the highest sequence number of the 01938 // retransmitted segment." 01939 if (seqLE(sentSeqNum, state->snd_max)) // HighData = snd_max 01940 { 01941 ASSERT (sentSeqNum==rexmitQueue->getHighestRexmittedSeqNum()); 01942 state->highRxt = rexmitQueue->getHighestRexmittedSeqNum(); 01943 } 01944 // RFC 3517 page 8: "(C.3) If any of the data octets sent in (C.1) are above HighData, 01945 // HighData must be updated to reflect the transmission of 01946 // previously unsent data." 01947 else if (seqGE(sentSeqNum, state->snd_max)) // HighData = snd_max 01948 state->snd_max = sentSeqNum; 01949 01950 if (unackedVector) 01951 unackedVector->record(state->snd_max - state->snd_una); 01952 01953 // RFC 3517, page 9: "6 Managing the RTO Timer 01954 // 01955 // The standard TCP RTO estimator is defined in [RFC2988]. Due to the 01956 // fact that the SACK algorithm in this document can have an impact on 01957 // the behavior of the estimator, implementers may wish to consider how 01958 // the timer is managed. [RFC2988] calls for the RTO timer to be 01959 // re-armed each time an ACK arrives that advances the cumulative ACK 01960 // point. Because the algorithm presented in this document can keep the 01961 // ACK clock going through a fairly significant loss event, 01962 // (comparatively longer than the algorithm described in [RFC2581]), on 01963 // some networks the loss event could last longer than the RTO. In this 01964 // case the RTO timer would expire prematurely and a segment that need 01965 // not be retransmitted would be resent. 01966 // 01967 // Therefore we give implementers the latitude to use the standard 01968 // [RFC2988] style RTO management or, optionally, a more careful variant 01969 // that re-arms the RTO timer on each retransmission that is sent during 01970 // recovery MAY be used. This provides a more conservative timer than 01971 // specified in [RFC2988], and so may not always be an attractive 01972 // alternative. However, in some cases it may prevent needless 01973 // retransmissions, go-back-N transmission and further reduction of the 01974 // congestion window." 01975 tcpAlgorithm->ackSent(); 01976 if (old_highRxt != state->highRxt) 01977 { 01978 // Note: Restart of REXMIT timer on retransmission is not part of RFC 2581, however optional in RFC 3517 if sent during recovery. 01979 tcpEV << "Retransmission sent during recovery, restarting REXMIT timer.\n"; 01980 tcpAlgorithm->restartRexmitTimer(); 01981 } 01982 else // don't measure RTT for retransmitted packets 01983 tcpAlgorithm->dataSent(seqNum); // seqNum = old_snd_nxt 01984 } 01985 01986 void TCPConnection::sendOneNewSegment(bool fullSegmentsOnly, uint32 congestionWindow) 01987 { 01988 ASSERT (state->limited_transmit_enabled); 01989 // RFC 3042, page 3: 01990 // "When a TCP sender has previously unsent data queued for transmission 01991 // it SHOULD use the Limited Transmit algorithm, which calls for a TCP 01992 // sender to transmit new data upon the arrival of the first two 01993 // consecutive duplicate ACKs when the following conditions are 01994 // satisfied: 01995 // 01996 // * The receiver's advertised window allows the transmission of the 01997 // segment. 01998 // 01999 // * The amount of outstanding data would remain less than or equal 02000 // to the congestion window plus 2 segments. In other words, the 02001 // sender can only send two segments beyond the congestion window 02002 // (cwnd). 02003 // 02004 // The congestion window (cwnd) MUST NOT be changed when these new 02005 // segments are transmitted. Assuming that these new segments and the 02006 // corresponding ACKs are not dropped, this procedure allows the sender 02007 // to infer loss using the standard Fast Retransmit threshold of three 02008 // duplicate ACKs [RFC2581]. This is more robust to reordered packets 02009 // than if an old packet were retransmitted on the first or second 02010 // duplicate ACK. 02011 // 02012 // Note: If the connection is using selective acknowledgments [RFC2018], 02013 // the data sender MUST NOT send new segments in response to duplicate 02014 // ACKs that contain no new SACK information, as a misbehaving receiver 02015 // can generate such ACKs to trigger inappropriate transmission of data 02016 // segments. See [SCWA99] for a discussion of attacks by misbehaving 02017 // receivers." 02018 if (!state->sack_enabled || (state->sack_enabled && state->sackedBytes_old!=state->sackedBytes)) 02019 { 02020 // check how many bytes we have 02021 ulong buffered = sendQueue->getBytesAvailable(state->snd_max); 02022 02023 if (buffered >= state->snd_mss || (!fullSegmentsOnly && buffered > 0)) 02024 { 02025 ulong outstandingData = state->snd_max - state->snd_una; 02026 // check conditions from RFC 3042 02027 if (outstandingData + state->snd_mss <= state->snd_wnd && 02028 outstandingData + state->snd_mss <= congestionWindow + 2*state->snd_mss) 02029 { 02030 uint32 effectiveWin = std::min (state->snd_wnd, congestionWindow) - outstandingData + 2*state->snd_mss; // RFC 3042, page 3: "(...)the sender can only send two segments beyond the congestion window (cwnd)." 02031 // bytes: number of bytes we're allowed to send now 02032 uint32 bytes = std::min(effectiveWin, state->snd_mss); 02033 if (bytes >= state->snd_mss || (!fullSegmentsOnly && bytes > 0)) 02034 { 02035 uint32 old_snd_nxt = state->snd_nxt; 02036 // we'll start sending from snd_max 02037 state->snd_nxt = state->snd_max; 02038 02039 tcpEV << "Limited Transmit algorithm enabled. Sending one new segment.\n"; 02040 sendSegment(bytes); 02041 02042 if (seqGreater(state->snd_nxt, state->snd_max)) 02043 state->snd_max = state->snd_nxt; 02044 02045 if (unackedVector) 02046 unackedVector->record(state->snd_max - state->snd_una); 02047 02048 // reset snd_nxt if needed 02049 if (state->afterRto) 02050 state->snd_nxt = old_snd_nxt + state->sentBytes; 02051 02052 // notify 02053 tcpAlgorithm->ackSent(); 02054 tcpAlgorithm->dataSent(old_snd_nxt); 02055 } 02056 } 02057 } 02058 } 02059 } 02060 02061 uint32 TCPConnection::convertSimtimeToTS(simtime_t simtime) 02062 { 02063 ASSERT (SimTime::getScaleExp() <= -3); // FIXME TODO - If the scale factor is different, we need to adjust our simTime to uint32 casts - we are currently using ms precision 02064 uint32 timestamp = (uint32) (simtime.dbl() * 1000); 02065 return timestamp; 02066 } 02067 02068 simtime_t TCPConnection::convertTSToSimtime(uint32 timestamp) 02069 { 02070 ASSERT (SimTime::getScaleExp() <= -3); // FIXME TODO - If the scale factor is different, we need to adjust our simTime to uint32 casts - we are currently using ms precision 02071 simtime_t simtime = (simtime_t) ((double) timestamp * 0.001); 02072 return simtime; 02073 } 02074 02075 bool TCPConnection::isSendQueueEmpty() 02076 { 02077 return (sendQueue->getBytesAvailable(state->snd_nxt) == 0); 02078 }