Hi community,
I recently had to solve the situation where I needed to get URLs from hyperlinks stored in rich text formatted long text fields and put them in another long text field. Essentially, just showing the URL, not the hyperlinked text. I ended up adapting the "Convert attachments to URLs" script (posted by Alex Wolfe). I had found a post here, that used a regex expression but this seemed to only work for visible URLs. I never could get it to work for my use case. I'd love feedback on what I could have done differently/better as I'm sure I haven't thought of everything... or even may have just completely missed something obvious.
Thanks!
// Script settings - Click the gear icon for the Scripting app to surface these anytime
// Surfaces options to select the table, view, and fields needed in the script
let config = input.config({
title: '🔗 Extract URLs from hyperlinks',
description: 'Extracts URLs from hyperlinks stored in Long Text fields with rich text formatting enabled.',
items: [
input.config.table('selectedTable', {
label: 'Table',
description: 'Select the table containing the field with hyperlinks',
}),
input.config.field('hyperlinkField', {
label: 'Field with hyperlinks:',
parentTable: 'selectedTable',
description: 'Select the field containing the hyperlinks',
}),
input.config.field('theURLField', {
label: 'Write URL to:',
parentTable: 'selectedTable',
description: 'Select the field to store the extracted URLs',
}),
]
});
// Codes the selections above to variables for the script
let selectedTable = config.selectedTable;
let hyperlinkField = config.hyperlinkField;
let theURLField = config.theURLField;
async function extractURLs() {
if (theURLField.type !== 'multilineText') {
output.text(`"${theURLField.name}" is not a multiline text field. Run the script again with a multiline text field.`);
return;
}
console.log("Script running...");
// Loads the records and field from the selections above
let query = await selectedTable.selectRecordsAsync({
fields: [hyperlinkField],
});
let records = query.records;
// Array for records with URLs extracted from the hyperlink field
let urlsToWrite = [];
// Regular expression to extract URLs from Markdown-style hyperlinks
const urlRegex = /\[.*?\]\((.*?)\)/g;
// Loop through qualified records and extract URLs
for (let i = 0; i < records.length; i++) {
let recID = records[i].id;
let hyperlinkContent = records[i].getCellValue(hyperlinkField);
if (hyperlinkContent !== null && typeof hyperlinkContent === 'string') {
// Extract URLs from the hyperlink field using the regular expression
let match;
let extractedUrls = [];
while ((match = urlRegex.exec(hyperlinkContent)) !== null) {
extractedUrls.push(match[1]);
}
// Store the extracted URLs in the array to be written to the URL field
urlsToWrite.push({
id: recID,
fields: {
[theURLField.name]: extractedUrls.join(', '),
},
});
}
}
// Update records in smaller batches
const batchSize = 50;
const totalBatches = Math.ceil(urlsToWrite.length / batchSize);
let batchCount = 0;
for (let i = 0; i < urlsToWrite.length; i += batchSize) {
const batchUpdate = urlsToWrite.slice(i, i + batchSize);
console.log(`\nProcessing Batch ${batchCount + 1} of ${totalBatches}`);
await showRunningMessage(); // Show running message and wait for a short period
await selectedTable.updateRecordsAsync(batchUpdate);
//console.log(`\nUpdated ${batchUpdate.length} records in Batch ${batchCount + 1}`);
batchCount++;
}
}
// Function to display a simple message and wait for a short period
async function showRunningMessage() {
await pseudoDelay(1000); // Simulate delay using loop for 1000 milliseconds (1 second)
}
// Function to create a delay using async/await and Promise
function pseudoDelay(milliseconds, value) {
return new Promise((resolve) => {
const start = Date.now();
while (Date.now() - start < milliseconds) {}
resolve(value);
});
}
await extractURLs();
// Output message to indicate when the script is done
console.log('Records have been updated with extracted URLs from the hyperlink field.');