NodeJs Clustering: Part2
Implementing Clustering using child_process
In the first part, we discussed how clustering, port, and load sharing works. In this post, we will write a basic clustering implementation using the Nodejs child_process module, this will further deepen your understanding of clustering, port sharing, and load balancing.
Round Robin Policy:
In the round-robin approach, the master process accepts the connection and then hands off the connection (client handle) to one of the worker processes. The worker process then handles all communication through a socket created on the handle.
Primary:
const { fork } = require('child_process');
const net = require('net');
const os = require('os');
const numCPUs = os.cpus().length;
const workers = [];
let workerIndex = 0;
// Fork workers
for (let i = 1; i <=numCPUs; i++) {
const worker = fork('worker-round-robin-policy.js'); // Fork the worker process
workers.push(worker);
}
// Create the server in the primary process
let server = net.createServer();
// Start listening on the server
server.listen({ port: 8080 });
// Once the server starts listening, get the handle for the server
server.once('listening', () => {
const serverhandle = server._handle;
console.log("Server listening on port 8080, server handle fd:", handle.fd);
// Start accepting new connections
acceptConnection(serverHandle);
});
// Function to handle new connections
function acceptConnection(serverHandle) {
// The server handle will receive new connections
serverHandle.onconnection = (err, clientHandle) => {
if (err) {
console.error("Error receiving new connection:", err);
return;
}
console.log("New connection received, handle fd:", clientHandle.fd);
// Distribute the connection to the workers
distribute(clientHandle);
};
}
// Function to distribute connections to workers
function distribute(clientHandle) {
workerIndex = (workerIndex + 1) % workers.length;
const worker = workers[workerIndex];
worker.send("new-conn", clientHandle); // Passing the handle properly to the worker
}
Worker:
const net = require('net');
const handleConenction = (socket) => {
console.log("New connection");
socket.on('close', () => {
console.log('Client has disconnected');
});
// Handle 'data' event to process data from the client
socket.on('data', (data) => {
console.log(`Data received: ${data}`);
// Write a response to the client, but check if writable first
if (socket.writable) {
console.log("writable");
const responseBody = 'Hello, HTTP World!';
const response =
`HTTP/1.1 200 OK\r\n` + // Status line
`Content-Type: text/plain\r\n` + // Content-Type header
`Content-Length: ${responseBody.length}\r\n` + // Content-Length header
`Connection: close\r\n` + // Close the connection after response
`\r\n` + // End of headers
`${responseBody}`; // Response body
// Write the HTTP response to the socket
socket.write(response);
socket.end();
}
});
}
const server = net.createServer(handleConenction);
// Listen for the message from the primary process
process.on('message', (msg, clientHandle) => {
if (msg === 'new-conn' && clientHandle) {
console.log("Received new connection handle in worker");
onconnection(clientHandle);
}
});
function onconnection(clientHandle) {
// Create a socket from the received handle
const socket = new net.Socket({
handle: clientHandle,
allowHalfOpen: server.allowHalfOpen,
pauseOnCreate: server.pauseOnConnect,
readable: true,
writable: true,
readableHighWaterMark: server.highWaterMark,
writableHighWaterMark: server.highWaterMark,
});
// Attach server properties to the socket
socket.server = server;
socket._server = server;
// Handle connection
server.emit('connection', socket);
}
// Handle worker shutdown
process.on('SIGINT', () => {
server.close();
});
console.log(`Worker ${process.pid} started`);
Direct Connection (Shared Handle):
In this approach, the master process creates the server handle but doesn’t accept connections directly, Instead, it allows each worker process to independently accept and handle incoming connections.
When a new connection is made to the shared port, the operating system decides which worker process will handle the connection. This could be based on availability or other criteria, ensuring that connections are distributed across multiple processes.
Primary:
Worker:
The second approach should, in theory, give the best performance. In practice however, distribution tends to be very unbalanced due to operating system scheduler vagaries. Loads have been observed where over 70% of all connections ended up in just two processes, out of a total of eight . Source: https://nodejs.org/api/cluster.html#how-it-works
The above is the very basic implementation of two methods of clustering using the child_process module based on my understanding and debugging the cluster module itself. The actual implementation is robust and has full-fledged APIs but the core idea is the same.
I hope you liked reading it. Please feel free to give your constructive feedback. The first part of this post.