目前,我们处于互联网时代,互联网产品百花齐放。例如,当打开浏览器,可以看到各种信息,浏览器是如何跟服务器进行通信的?当打开微信跟朋友聊天时,你是如何跟朋友进行消息传递的?这些都得靠网络进程之间的通信,都得依赖于 socket。那什么是 socket?node 中有哪些跟网络通信有关的模块?这些问题是本文研究的重点。
Socket 源于 Unix,而 Unix 的基本哲学是『一些皆文件』,都可以用『打开 open ==> 读 / 写 (read/write) ==> 关闭 (close)』模式来操作,Socket 也可以采用这种方法进行理解。关于 Socket,可以总结如下几点:
node 中的模块,从两种语言实现角度来说,存在 javscript、c++ 两部分,通过
来建立关系。具体分析如下:
- process.binding
- crypto
是基于 TCP 协议的 socket 网路编程模块,http 模块就是建立在该模块的基础上实现的,先来看看基本使用方法:
- // 创建socket服务器 server.js
- const net = require('net') const server = net.createServer();
- server.on('connection', (socket) = >{
- socket.pipe(process.stdout);
- socket.write('data from server');
- });
- server.listen(3000, () = >{
- console.log(`server is on $ {
- JSON.stringify(server.address())
- }`);
- });
- // 创建socket客户端 client.js
- const net = require('net');
- const client = net.connect({
- port: 3000
- });
- client.on('connect', () = >{
- client.write('data from client');
- });
- client.on('data', (chunk) = >{
- console.log(chunk.toString());
- client.end();
- });
- // 打开两个终端,分别执行`node server.js`、`node client.js`,可以看到客户端与服务器进行了数据通信。
使用
创建了 server 对象,那 server 对象有哪些特点:
- const server = net.createServer();
- // net.js
- exports.createServer = function(options, connectionListener) {
- return new Server(options, connectionListener);
- };
- function Server(options, connectionListener) {
- EventEmitter.call(this);
- ...
- if (typeof connectionListener === 'function') {
- this.on('connection', connectionListener);
- }
- ...
- this._handle = null;
- }
- util.inherits(Server, EventEmitter);
上述代码可以分为几个点:
就是一个语法糖,帮助 new 生成 server 对象
- createServer
、
- TCP
类创建
- Pipe
再来看看 connectionListener 事件的回调函数,里面包含一个
对象,该对象是一个连接套接字,是个五元组 (server_host、server_ip、protocol、client_host、client_ip),相关实现如下:
- socket
- function onconnection(err, clientHandle) {
- ...
- var socket = new Socket({
- ...
- });
- ...
- self.emit('connection', socket);
- }
因为 Socket 是继承了
,所以 Socket 也是一个可读可写流,可以使用流的方法进行数据的处理。
- stream.Duplex
接下来就是很关键的端口监听 (port),这是 server 与 client 的主要区别,代码:
- Server.prototype.listen = function() {
- ...
- listen(self, ip, port, addressType, backlog, fd, exclusive);
- ...
- }
- function listen(self, address, port, addressType, backlog, fd, exclusive) {
- ...
- if (!cluster) cluster = require('cluster');
- if (cluster.isMaster || exclusive) {
- self._listen2(address, port, addressType, backlog, fd);
- return;
- }
- cluster._getServer(self, {
- ...
- }, cb);
- function cb(err, handle) {
- ...
- self._handle = handle;
- self._listen2(address, port, addressType, backlog, fd);
- ...
- }
- }
- Server.prototype._listen2 = function(address, port, addressType, backlog, fd) {
- if (this._handle) {
- ...
- } else {
- ...
- rval = createServerHandle(address, port, addressType, fd);
- ...
- this._handle = rval;
- }
- this._handle.onconnection = onconnection;
- var err = _listen(this._handle, backlog);
- ...
- }
- function _listen(handle, backlog) {
- return handle.listen(backlog || 511);
- }
上述代码有几个点需要注意:
接下来分析下 listen 中最重要的
了,_handle 决定了 server 的功能:
- _handle
- function createServerHandle(address, port, addressType, fd) {
- ...
- if (typeof fd === 'number' && fd >= 0) {
- ...
- handle = createHandle(fd);
- ...
- } else if(port === -1 && addressType === -1){
- handle = new Pipe();
- } else {
- handle = new TCP();
- }
- ...
- return handle;
- }
- function createHandle(fd) {
- var type = TTYWrap.guessHandleType(fd);
- if (type === 'PIPE') return new Pipe();
- if (type === 'TCP') return new TCP();
- throw new TypeError('Unsupported fd type: ' + type);
- }
由 C++ 中的 Pipe、TCP 实现,因而要想完全搞清楚 node 中的网络通信,必须深入到 V8 的源码里面。
- _handle
跟 net 模块相比,基于 UDP 通信的 dgram 模块就简单了很多,因为不需要通过三次握手建立连接,所以整个通信的过程就简单了很多,对于数据准确性要求不太高的业务场景,可以使用该模块完成数据的通信。
- // server端实现
- const dgram = require('dgram');
- const server = dgram.createSocket('udp4');
- server.on('message', (msg, addressInfo) => {
- console.log(addressInfo);
- console.log(msg.toString());
- const data = Buffer.from('from server');
- server.send(data, addressInfo.port);
- });
- server.bind(3000, () => {
- console.log('server is on ', server.address());
- });
- // client端实现
- const dgram = require('dgram');
- const client = dgram.createSocket('udp4');
- const data = Buffer.from('from client');
- client.send(data, 3000);
- client.on('message', (msg, addressInfo) => {
- console.log(addressInfo);
- console.log(msg.toString());
- client.close();
- });
从源码层面分析上述代码的原理实现:
- exports.createSocket = function(type, listener) {
- return new Socket(type, listener);
- };
- function Socket(type, listener) {
- ...
- var handle = newHandle(type);
- this._handle = handle;
- ...
- this.on('message', listener);
- ...
- }
- util.inherits(Socket, EventEmitter);
- const UDP = process.binding('udp_wrap').UDP;
- function newHandle(type) {
- if (type == 'udp4') {
- const handle = new UDP();
- handle.lookup = lookup4;
- return handle;
- }
- if (type == 'udp6') {
- const handle = new UDP();
- handle.lookup = lookup6;
- handle.bind = handle.bind6;
- handle.send = handle.send6;
- return handle;
- }
- ...
- }
- Socket.prototype.bind = function(port_ /*, address, callback*/) {
- ...
- startListening(self);
- ...
- }
- function startListening(socket) {
- socket._handle.onmessage = onMessage;
- socket._handle.recvStart();
- ...
- }
- function onMessage(nread, handle, buf, rinfo) {
- ...
- self.emit('message', buf, rinfo);
- ...
- }
- Socket.prototype.send = function(buffer, offset, length, port, address, callback) {
- ...
- self._handle.lookup(address, function afterDns(ex, ip) {
- doSend(ex, self, ip, list, address, port, callback);
- });
- }
- const SendWrap = process.binding('udp_wrap').SendWrap;
- function doSend(ex, self, ip, list, address, port, callback) {
- ...
- var req = new SendWrap();
- ...
- var err = self._handle.send(req, list, list.length, port, ip, !!callback);
- ...
- }
上述代码存在几个点需要注意:
DNS(Domain Name System) 用于域名解析,也就是找到 host 对应的 ip 地址,在计算机网络中,这个工作是由网络层的 ARP 协议实现。在 node 中存在
模块来完成相应功能,其中 dns 里面的函数分为两类:
- net
- dns.lookup
- const dns = require('dns');
- const host = 'bj.meituan.com';
- dns.lookup(host, (err, address, family) = >{
- if (err) {
- console.log(err);
- return;
- }
- console.log('by net.lookup, address is: %s, family is: %s', address, family);
- });
- dns.resolve(host, (err, address) = >{
- if (err) {
- console.log(err);
- return;
- }
- console.log('by net.resolve, address is: %s', address);
- })
- // by net.resolve, address is: 103.37.152.41
- // by net.lookup, address is: 103.37.152.41, family is: 4
在这种情况下,二者解析的结果是一样的,但是假如我们修改本地的 / etc/hosts 文件呢
- // 在/etc/host文件中,增加:
- 10.10.10.0 bj.meituan.com
- // 然后再执行上述文件,结果是:
- by net.resolve,
- address is: 103.37.152.41 by net.lookup,
- address is: 10.10.10.0,
- family is: 4
接下来分析下 dns 的内部实现:
- const cares = process.binding('cares_wrap');
- const GetAddrInfoReqWrap = cares.GetAddrInfoReqWrap;
- exports.lookup = function lookup(hostname, options, callback) {
- ...
- callback = makeAsync(callback);
- ...
- var req = new GetAddrInfoReqWrap();
- req.callback = callback;
- var err = cares.getaddrinfo(req, hostname, family, hints);
- ...
- }
- function resolver(bindingName) {
- var binding = cares[bindingName];
- return function query(name, callback) {
- ...
- callback = makeAsync(callback);
- var req = new QueryReqWrap();
- req.callback = callback;
- var err = binding(req, name);
- ...
- return req;
- }
- }
- var resolveMap = Object.create(null);
- exports.resolve4 = resolveMap.A = resolver('queryA');
- exports.resolve6 = resolveMap.AAAA = resolver('queryAaaa');
- ...
- exports.resolve = function(hostname, type_, callback_) {
- ...
- resolver = resolveMap[type_];
- return resolver(hostname, callback);
- ...
- }
上面的源码有几个点需要关注:
在 WEB 开发中,HTTP 作为最流行、最重要的应用层,是每个开发人员应该熟知的基础知识,我面试的时候必问的一块内容。同时,大多数同学接触 node 时,首先使用的恐怕就是 http 模块。先来一个简单的 demo 看看:
- const http = require('http');
- const server = http.createServer();
- server.on('request', (req, res) = >{
- res.setHeader('foo', 'test');
- res.writeHead(200, {
- 'Content-Type': 'text/html',
- });
- res.write('<!doctype>');
- res.end(` < html > </html>`);
- });
- server.listen(3000, () => {
- console.log('server is on ', server.address());
- var req = http.request({ host: '127.0.0.1', port: 3000});
- req.on('response', (res) => {
- res.on('data', (chunk) => console.log('data from server ', chunk.toString()) );
- res.on('end', () => server.close() );
- });
- req.end();
- });
- / / 输出结果如下:
- // server is on { address: '::', family: 'IPv6', port: 3000 }
- // data from server <!doctype>
- // data from server <html></html>
针对上述 demo,有很多值得深究的地方,一不注意服务就挂掉了,下面根据 node 的 ,逐个进行研究。
因为 HTTP 协议是无状态协议,每个请求均需通过三次握手建立连接进行通信,众所周知三次握手、慢启动算法、四次挥手等过程很消耗时间,因此 HTTP1.1 协议引入了 keep-alive 来避免频繁的连接。那么对于 tcp 连接该如何管理呢?http.Agent 就是做这个工作的。先看看源码中的关键部分:
- function Agent(options) {
- ...
- EventEmitter.call(this);
- ...
- self.maxSockets = self.options.maxSockets || Agent.defaultMaxSockets;
- self.maxFreeSockets = self.options.maxFreeSockets || 256;
- ...
- self.requests = {}; // 请求队列
- self.sockets = {}; // 正在使用的tcp连接池
- self.freeSockets = {}; // 空闲的连接池
- self.on('free', function(socket, options) {
- ...
- // requests、sockets、freeSockets的读写操作
- self.requests[name].shift().onSocket(socket);
- freeSockets.push(socket);
- ...
- }
- }
- Agent.defaultMaxSockets = Infinity;
- util.inherits(Agent, EventEmitter);
- // 关于socket的相关增删改查操作
- Agent.prototype.addRequest = function(req, options) {
- ...
- if (freeLen) {
- var socket = this.freeSockets[name].shift();
- ...
- this.sockets[name].push(socket);
- ...
- } else if (sockLen < this.maxSockets) {
- ...
- } else {
- this.requests[name].push(req);
- }
- ...
- }
- Agent.prototype.createSocket = function(req, options, cb) { ... }
- Agent.prototype.removeSocket = function(s, options) { ... }
- exports.globalAgent = new Agent();
上述代码有几个点需要注意:
、
- sockets
的增删查
- freeSockets
下面可以测试下 agent 对请求本身的限制:
- // req.js
- const http = require('http');
- const server = http.createServer();
- server.on('request', (req, res) = >{
- var i = 1;
- setTimeout(() = >{
- res.end('ok ', i++);
- },
- 1000)
- });
- server.listen(3000, () = >{
- var max = 20;
- for (var i = 0; i < max; i++) {
- var req = http.request({
- host: '127.0.0.1',
- port: 3000
- });
- req.on('response', (res) = >{
- res.on('data', (chunk) = >console.log('data from server ', chunk.toString()));
- res.on('end', () = >server.close());
- });
- req.end();
- }
- });
- // 在终端中执行time node ./req.js,结果为:
- // real 0m1.123s
- // user 0m0.102s
- // sys 0m0.024s
- // 在req.js中添加下面代码
- http.globalAgent.maxSockets = 5;
- // 然后同样time node ./req.js,结果为:
- real 0m4.141s user 0m0.103s sys 0m0.024s
当设置 maxSockets 为某个值时,tcp 的连接就会被限制在某个值,剩余的请求就会进入
队列里面,等有空余的 socket 连接后,从 request 队列中出栈,发送请求。
- requests
当执行 http.request 时,会生成 ClientRequest 对象,该对象虽然没有直接继承 Stream.Writable,但是继承了 http.OutgoingMessage,而 http.OutgoingMessage 实现了 write、end 方法,因为可以当跟 stream.Writable 一样的使用。
- var req = http.request({
- host: '127.0.0.1',
- port: 3000,
- method: 'post'
- });
- req.on('response', (res) = >{
- res.on('data', (chunk) = >console.log('data from server ', chunk.toString()));
- res.on('end', () = >server.close());
- });
- // 直接使用pipe,在request请求中添加数据
- fs.createReadStream('./data.json').pipe(req);
接下来,看看 http.ClientRequest 的实现, ClientRequest 继承了 OutgoingMessage:
- const OutgoingMessage = require('_http_outgoing').OutgoingMessage;
- function ClientRequest(options, cb) {
- ...
- OutgoingMessage.call(self);
- ...
- }
- util.inherits(ClientRequest, OutgoingMessage);
http.createServer 其实就是创建了一个 http.Server 对象,关键源码如下:
- exports.createServer = function(requestListener) {
- return new Server(requestListener);
- };
- function Server(requestListener) {
- ...
- net.Server.call(this, { allowHalfOpen: true });
- if (requestListener) {
- this.addListener('request', requestListener);
- }
- ...
- this.addListener('connection', connectionListener);
- this.timeout = 2 * 60 * 1000;
- ...
- }
- util.inherits(Server, net.Server);
- function connectionListener(socket) {
- ...
- socket.on('end', socketOnEnd);
- socket.on('data', socketOnData)
- ...
- }
有几个需要要关注的点:
看 node.org 官方是如何介绍 server 端的 response 对象的:
This object is created internally by an HTTP server–not by the user. It is passed as the second parameter to the 'request' event.
The response implements, but does not inherit from, the Writable Stream interface.
跟 http.ClientRequest 很像,继承了 OutgoingMessage,没有继承 Stream.Writable,但是实现了 Stream 的功能,可以跟 Stream.Writable 一样灵活使用:
- function ServerResponse(req) {
- ...
- OutgoingMessage.call(this);
- ...
- }
- util.inherits(ServerResponse, OutgoingMessage);
An IncomingMessage object is created by http.Server or http.ClientRequest and passed as the first argument to the 'request' and 'response' event respectively. It may be used to access response status, headers and data.
http.IncomingMessage 有两个地方时被内部创建,一个是作为 server 端的 request,另外一个是作为 client 请求中的 response,同时该类显示地继承了 Stream.Readable。
- function IncomingMessage(socket) {
- Stream.Readable.call(this);
- this.socket = socket;
- this.connection = socket;
- ...
- }
- util.inherits(IncomingMessage, Stream.Readable);
上面是对 node 中主要的网络通信模块,粗略进行了分析研究,对网络通信的细节有大概的了解。但是这还远远不够的,仍然无法解决 node 应用中出现的各种网络问题,这边文章只是一个开端,希望后面可以深入了解各个细节、深入到 c++ 层面。
来源: