How to export extensive data into chunks from the MongoDB

Khan Usama
2 min readJan 12, 2023

--

Exporting a large amount of data from a MongoDB collection can be a resource-intensive task, especially if the data set is huge and the server has limited resources. One way to avoid overwhelming the server is to export the data in smaller chunks rather than all at once.

Here is an example of how you might export a large amount of data in chunks using Node.js and the MongoDB Node.js driver:

// src/exportData.js
const MongoClient = require('mongodb').MongoClient;
const fs = require('fs');

// find new code demo in the below
const { uploadFile } = require('./upload-s3')

// Connect to the MongoDB server
MongoClient.connect('mongodb://localhost:27017', {useNewUrlParser: true}, (err, client) => {
if (err) throw err;

// Select the database and collection
const db = client.db('yourDatabaseName');
const collection = db.collection('yourCollectionName');

// Define the chunk size and initialize the skip value
const chunkSize = 500;
let skip = 0;

// Define a function to export the data in chunks
const exportData = () => {
collection.find({}).skip(skip).limit(chunkSize).toArray((err, data) => {
if (err) throw err;

// Write the data to a CSV file
fs.appendFileSync('./public/dump/data.csv', data.map(d => Object.values(d).join(',')).join('\n'));

// Update the skip value and check if there is more data to export
skip += chunkSize;
if (data.length === chunkSize) {
exportData();
} else {
// Close the MongoDB connection
client.close();

uploadFile('data_dump.csv', './public/dump/data.csv', (err, data) => {
if (!err) console.log(data)
})
}
});
};

// Call the export function
exportData();
});

This code exports the data in chunks of 500 records at a time and appends the data to a CSV file named “data.csv”. By doing this, the server is not overwhelmed with a large amount of data at once and it will avoid server stuck.

Also, you can upload that CSV file to the AWS-S3 bucket by using a stream.

// file: upload-s3.js
// create file in the same directory
// src/upload-s3.js
const AWS = require('aws-sdk');
const fs = require('fs');

let uploadFile = function(filename, path, cb) {
let stream = fs.createReadStream(path);
var s3 = new AWS.S3({
accessKeyId: 'YOUR_ACCESS_KEY_ID',
secretAccessKey: 'YOUR_SECRET_ACCESS_KEY',
region: 'YOUR_REGION'
});
const params = {
Key: filename,
Body: stream,
ACL: 'private',
Bucket: 'YOUR_BUCKET_NAME',
};
s3.upload(params, (err, data) => {
if (err) {
log.error('Error while file upload: ', err.message);
cb(err, null);
} else {
console.info('File uploaded successfully: ', JSON.stringify(data))
// delete file from the server after successfully upload
fs.unlink(path, (error) => {
if (error) {
log.error("Error occured while deleting file => ", error);
cb(error, null);
} else {
log.info("File deleted successfully");
cb(null, data);
}
});
}
});
}

module.exports = {
uploadFile
}

When the file is uploaded successfully, you can inform the client via email or socket.io-events or push notifications.

Thanks

--

--

Khan Usama

I am a Full Stack Developer(MEVN) at Iorta Technology Solutions Pvt. Ltd.