Skip to main content

The deduplication extension isn't efficient due to the large number of duplicates we have—thousands in total. Therefore, I’ve created a script to merge and remove duplicate records based on identifying fields.

The script runs successfully when I don't merge linked record data, but this approach results in valuable information being lost. When I include the merging logic, the script works on smaller datasets (Although, take some time), but it crashes when processing larger datasets.

This is the error I get:

 

This is my script:

 

// Hard-coded configuration let tableName = 'Contacts copy 3'; // Replace with your table name let idFieldName = 'Contact'; // Replace with your identifying field name let emailFieldName = 'Email Address 1'; // Field name for Email Address 1 let mobileFieldName = 'Mobile'; // Field name for Mobile let viewName = 'test 2'; // Replace with the name of the view you want to use // Retrieve the table let table = base.getTable(tableName); // Airtable limits batch operations to 50 records or fewer. let maxRecordsPerCall = 50; // Function to normalize text function normalizeText(text) { if (!text) return ''; return text .toLowerCase() // Convert to lowercase .normalize('NFKD') // Normalize accents .replace(/[\u0300-\u036f]/g, '') // Remove accents .replace(/[^\w\s]/g, '') // Remove punctuation .replace(/\s+/g, ' ') // Normalize whitespace .trim(); // Trim leading/trailing whitespace } // Function to create a key for identifying records function createKey(record) { let idValue = normalizeText(record.getCellValue(idFieldName)); let emailValue = normalizeText(record.getCellValue(emailFieldName)); let mobileValue = normalizeText(record.getCellValue(mobileFieldName)); return JSON.stringify([idValue, emailValue, mobileValue]); } let existing = Object.create(null); let recordsToUpdate = []; let recordsToDelete = []; // Part 1: Identify records with matching contacts let query = await table.selectRecordsAsync({ fields: [idFieldName, emailFieldName, mobileFieldName], view: viewName // Query records from the selected view }); for (let record of query.records) { let key = createKey(record); if (existing[key]) { existing[key].push(record); } else { existing[key] = [record]; } } // Process records based on the conditions for (let records of Object.values(existing)) { if (records.length > 1) { // Sort records by created time or other criteria records.sort((a, b) => new Date(b.createdTime) - new Date(a.createdTime)); let masterRecord = records[0]; let contact = normalizeText(masterRecord.getCellValue(idFieldName)); let emailSet = new Set(); let mobileSet = new Set(); for (let record of records) { let email = normalizeText(record.getCellValue(emailFieldName)); let mobile = normalizeText(record.getCellValue(mobileFieldName)); emailSet.add(email); mobileSet.add(mobile); // Prepare records for deletion if (record.id !== masterRecord.id) { recordsToDelete.push(record.id); } } // Update the master record with merged fields let mergedFields = { [emailFieldName]: [...emailSet].join(', '), [mobileFieldName]: [...mobileSet].join(', ') }; if (Object.keys(mergedFields).length > 0) { recordsToUpdate.push({ id: masterRecord.id, fields: mergedFields }); } } } // Apply updates to merge fields console.log(`Updating ${recordsToUpdate.length} records...`); while (recordsToUpdate.length > 0) { let batch = recordsToUpdate.slice(0, maxRecordsPerCall); await table.updateRecordsAsync(batch); recordsToUpdate = recordsToUpdate.slice(maxRecordsPerCall); } // Apply deletions console.log(`Deleting ${recordsToDelete.length} records...`); while (recordsToDelete.length > 0) { let batch = recordsToDelete.slice(0, maxRecordsPerCall); await table.deleteRecordsAsync(batch); recordsToDelete = recordsToDelete.slice(maxRecordsPerCall); } console.log('Done');

 

 Any suggestions?

Be the first to reply!