Nov 09, 2024 04:23 PM
Hey Y'all! My name is Louis Piotti from Philadelphia. I work as the CEO of the nonprofit AfterShock Fam (aftershockfam.org) and the CTO & Co-Founder for Polaris Pathways (polarispathways.com) and I also do custom builds for businesses from large manufacturers, festivals, workshops, and schools.
I just spend hours banging my head against a wall trying to find how to merge records similar to the De-Dupe Extension but would just do it automatically and just built it.
I shared the template for everyone to use along with all the code / prompt history in hopes that less people have to goes through 1000 records 1 by 1 on the De-Dupe extension haha
https://airtable.com/appdymmnNJo098vJG/shrssRCuFQcrla1Rc
// Initialize the table and fields
let table = base.getTable("Contacts");
let query = await table.selectRecordsAsync();
let records = query.records;
// Define specific field names for non-single-line, single-select, checkbox, or attachment fields
const emailField = "Email";
const tagsField = "Tags"; // Multi-select "Tags" field
const companiesField = "Companies"; // Linked record "Companies" field
const notesField = "Notes"; // Long text "Notes" field
const birthdayField = "Birthday"; // Date "Birthday" field
const subscribedField = "Subscribed"; // Checkbox "Subscribed" field
const attachmentsField = "Attachments"; // Attachment field "Attachments"
// Get all single-line text and single-select fields in the table, excluding specified fields
let singleLineTextFields = table.fields.filter(
field => field.type === "singleLineText" && ![emailField, tagsField, companiesField, notesField, birthdayField, subscribedField, attachmentsField].includes(field.name)
);
let singleSelectFields = table.fields.filter(
field => field.type === "singleSelect" && ![emailField, tagsField, companiesField, notesField, birthdayField, subscribedField, attachmentsField].includes(field.name)
);
// Object to track records by email
let emailRecordsMap = {};
// Step 1: Group records by email
for (let record of records) {
let email = record.getCellValue(emailField);
// Skip records without an email
if (!email) continue;
// If email is already in the map, add this record to the duplicate list
if (emailRecordsMap[email]) {
emailRecordsMap[email].push(record);
} else {
emailRecordsMap[email] = [record];
}
}
// Step 2: Process duplicates
for (let email in emailRecordsMap) {
let duplicateRecords = emailRecordsMap[email];
// If there's only one record, skip to next email
if (duplicateRecords.length === 1) continue;
// Choose the main record (first one found) and initialize merge fields
let mainRecord = duplicateRecords[0];
let updatedTags = mainRecord.getCellValue(tagsField) || []; // Initialize with main record's tags
let updatedCompanies = mainRecord.getCellValue(companiesField) || []; // Initialize with main record's companies
let updatedNotes = mainRecord.getCellValue(notesField) || ""; // Initialize with main record's notes
let isSubscribed = mainRecord.getCellValue(subscribedField) || false; // Initialize subscription status
let mergedAttachments = mainRecord.getCellValue(attachmentsField) || []; // Initialize with main record's attachments
// Convert tags, companies, and attachments to sets to ensure unique entries
let uniqueTags = new Set(updatedTags.map(tag => tag.id)); // Track tag IDs
let uniqueCompanies = new Set(updatedCompanies.map(company => company.id)); // Track company IDs
let uniqueAttachments = new Set(mergedAttachments.map(attachment => attachment.id)); // Track attachment IDs
// Set to track unique notes content to avoid duplicates
let uniqueNotes = new Set();
if (updatedNotes) {
uniqueNotes.add(updatedNotes); // Start with the main record's notes
}
// Initialize dictionaries to count occurrences of each value for single-line text, single-select fields, and birthday
let singleLineCounts = {};
for (let field of singleLineTextFields) {
singleLineCounts[field.name] = {};
}
let singleSelectCounts = {};
for (let field of singleSelectFields) {
singleSelectCounts[field.name] = {};
}
let birthdayCounts = {}; // Track occurrences of each birthday
// Iterate through duplicates to count values and merge other data
for (let record of duplicateRecords) {
// Count occurrences for each single-line text field
for (let field of singleLineTextFields) {
let fieldValue = record.getCellValue(field);
if (fieldValue) {
singleLineCounts[field.name][fieldValue] = (singleLineCounts[field.name][fieldValue] || 0) + 1;
}
}
// Count occurrences for each single-select field
for (let field of singleSelectFields) {
let fieldValue = record.getCellValue(field);
if (fieldValue) {
singleSelectCounts[field.name][fieldValue.name] = (singleSelectCounts[field.name][fieldValue.name] || 0) + 1;
}
}
// Count occurrences for "Birthday"
let birthday = record.getCellValue(birthdayField);
if (birthday) {
birthdayCounts[birthday] = (birthdayCounts[birthday] || 0) + 1;
}
// Check "Subscribed" - if any duplicate has it checked, set isSubscribed to true
if (record.getCellValue(subscribedField)) {
isSubscribed = true;
}
// Merge "Tags" - accumulate unique tags from duplicates
let tags = record.getCellValue(tagsField) || [];
for (let tag of tags) {
if (!uniqueTags.has(tag.id)) {
uniqueTags.add(tag.id);
updatedTags.push(tag);
}
}
// Merge "Companies" - accumulate unique companies from duplicates
let companies = record.getCellValue(companiesField) || [];
for (let company of companies) {
if (!uniqueCompanies.has(company.id)) {
uniqueCompanies.add(company.id);
updatedCompanies.push(company);
}
}
// Merge "Attachments" - add unique attachments only
let attachments = record.getCellValue(attachmentsField) || [];
for (let attachment of attachments) {
if (!uniqueAttachments.has(attachment.id)) {
uniqueAttachments.add(attachment.id);
mergedAttachments.push(attachment);
}
}
// Merge "Notes" - add unique notes only
let notes = record.getCellValue(notesField) || "";
if (notes && !uniqueNotes.has(notes)) {
uniqueNotes.add(notes);
updatedNotes += (updatedNotes ? "\n\n" : "") + notes; // Separate with a line break if there's existing text
}
}
// Step 3: Determine the most frequent value for each single-line text field
let singleLineUpdates = {};
for (let field of singleLineTextFields) {
let mostFrequentValue = "";
let maxCount = 0;
for (let value in singleLineCounts[field.name]) {
if (singleLineCounts[field.name][value] > maxCount) {
maxCount = singleLineCounts[field.name][value];
mostFrequentValue = value;
}
}
singleLineUpdates[field.name] = mostFrequentValue;
}
// Step 4: Determine the most frequent value for each single-select field and format it as an object
let singleSelectUpdates = {};
for (let field of singleSelectFields) {
let mostFrequentValue = "";
let maxCount = 0;
for (let value in singleSelectCounts[field.name]) {
if (singleSelectCounts[field.name][value] > maxCount) {
maxCount = singleSelectCounts[field.name][value];
mostFrequentValue = value;
}
}
// Set the most frequent value as an object with the `name` key
singleSelectUpdates[field.name] = mostFrequentValue ? { name: mostFrequentValue } : null;
}
// Step 5: Determine the most frequent birthday
let mostFrequentBirthday = "";
let maxBirthdayCount = 0;
for (let date in birthdayCounts) {
if (birthdayCounts[date] > maxBirthdayCount) {
maxBirthdayCount = birthdayCounts[date];
mostFrequentBirthday = date;
}
}
// Step 6: Update main record with merged data
await table.updateRecordAsync(mainRecord.id, {
...singleLineUpdates, // Update all single-line text fields with most frequent values
...singleSelectUpdates, // Update all single-select fields with most frequent values
[birthdayField]: mostFrequentBirthday, // Update birthday with most frequent date
[subscribedField]: isSubscribed, // Update subscribed status if any duplicate has it checked
[tagsField]: updatedTags, // Update tags field with all unique tags combined
[companiesField]: updatedCompanies, // Update companies field with all unique companies combined
[notesField]: updatedNotes, // Update notes with concatenated unique text
[attachmentsField]: mergedAttachments // Update attachments with all unique files combined
});
// Step 7: Delete duplicate records, keeping the main record
let recordsToDelete = duplicateRecords.slice(1).map(record => record.id);
await table.deleteRecordsAsync(recordsToDelete);
}
output.text("Duplicate records have been merged, with the most frequent values for single-line, single-select, date, checkbox, and attachment fields, along with unique tags, companies, and notes. Redundant records deleted.");