Jul 28, 2023 10:45 AM
Hi community,
I recently had to solve the situation where I needed to get URLs from hyperlinks stored in rich text formatted long text fields and put them in another long text field. Essentially, just showing the URL, not the hyperlinked text. I ended up adapting the "Convert attachments to URLs" script (posted by Alex Wolfe). I had found a post here, that used a regex expression but this seemed to only work for visible URLs. I never could get it to work for my use case. I'd love feedback on what I could have done differently/better as I'm sure I haven't thought of everything... or even may have just completely missed something obvious.
Thanks!
// Script settings - Click the gear icon for the Scripting app to surface these anytime
// Surfaces options to select the table, view, and fields needed in the script
let config = input.config({
title: '🔗 Extract URLs from hyperlinks',
description: 'Extracts URLs from hyperlinks stored in Long Text fields with rich text formatting enabled.',
items: [
input.config.table('selectedTable', {
label: 'Table',
description: 'Select the table containing the field with hyperlinks',
}),
input.config.field('hyperlinkField', {
label: 'Field with hyperlinks:',
parentTable: 'selectedTable',
description: 'Select the field containing the hyperlinks',
}),
input.config.field('theURLField', {
label: 'Write URL to:',
parentTable: 'selectedTable',
description: 'Select the field to store the extracted URLs',
}),
]
});
// Codes the selections above to variables for the script
let selectedTable = config.selectedTable;
let hyperlinkField = config.hyperlinkField;
let theURLField = config.theURLField;
async function extractURLs() {
if (theURLField.type !== 'multilineText') {
output.text(`"${theURLField.name}" is not a multiline text field. Run the script again with a multiline text field.`);
return;
}
console.log("Script running...");
// Loads the records and field from the selections above
let query = await selectedTable.selectRecordsAsync({
fields: [hyperlinkField],
});
let records = query.records;
// Array for records with URLs extracted from the hyperlink field
let urlsToWrite = [];
// Regular expression to extract URLs from Markdown-style hyperlinks
const urlRegex = /\[.*?\]\((.*?)\)/g;
// Loop through qualified records and extract URLs
for (let i = 0; i < records.length; i++) {
let recID = records[i].id;
let hyperlinkContent = records[i].getCellValue(hyperlinkField);
if (hyperlinkContent !== null && typeof hyperlinkContent === 'string') {
// Extract URLs from the hyperlink field using the regular expression
let match;
let extractedUrls = [];
while ((match = urlRegex.exec(hyperlinkContent)) !== null) {
extractedUrls.push(match[1]);
}
// Store the extracted URLs in the array to be written to the URL field
urlsToWrite.push({
id: recID,
fields: {
[theURLField.name]: extractedUrls.join(', '),
},
});
}
}
// Update records in smaller batches
const batchSize = 50;
const totalBatches = Math.ceil(urlsToWrite.length / batchSize);
let batchCount = 0;
for (let i = 0; i < urlsToWrite.length; i += batchSize) {
const batchUpdate = urlsToWrite.slice(i, i + batchSize);
console.log(`\nProcessing Batch ${batchCount + 1} of ${totalBatches}`);
await showRunningMessage(); // Show running message and wait for a short period
await selectedTable.updateRecordsAsync(batchUpdate);
//console.log(`\nUpdated ${batchUpdate.length} records in Batch ${batchCount + 1}`);
batchCount++;
}
}
// Function to display a simple message and wait for a short period
async function showRunningMessage() {
await pseudoDelay(1000); // Simulate delay using loop for 1000 milliseconds (1 second)
}
// Function to create a delay using async/await and Promise
function pseudoDelay(milliseconds, value) {
return new Promise((resolve) => {
const start = Date.now();
while (Date.now() - start < milliseconds) {}
resolve(value);
});
}
await extractURLs();
// Output message to indicate when the script is done
console.log('Records have been updated with extracted URLs from the hyperlink field.');
Solved! Go to Solution.
Jul 31, 2023 08:53 PM - edited Jul 31, 2023 09:00 PM
Kuovonne is correct that there are two types of links in the rich format for long text fields.
The scripting you have shown supports one type.
If you replace the following part of the scripting with this one, it should support both types of links.
// Regular expression to extract URLs from Markdown-style hyperlinks
const urlRegex = /\[.*?\]\((.*?)\)|<(.*?)>/g;
// Loop through qualified records and extract URLs
for (let i = 0; i < records.length; i++) {
let recID = records[i].id;
let hyperlinkContent = records[i].getCellValue(hyperlinkField);
if (hyperlinkContent !== null && typeof hyperlinkContent === 'string') {
// Extract URLs from the hyperlink field using the regular expression
let match;
let extractedUrls = [];
while ((match = urlRegex.exec(hyperlinkContent)) !== null) {
if(match[1])extractedUrls.push(match[1]);
if(match[2])extractedUrls.push(match[2]);
}
Jul 28, 2023 08:10 PM
If you can show me a few different examples, I might be able to come up with a corresponding formula
(Text containing the link and text into which the link is inserted)
Is the long text field into which you are inserting the link a rich text format?
Jul 30, 2023 01:00 PM
Examples of the rich text are important to see what is going wrong. I suggest you find a record where the urls are not being extracted as expected, write a script that outputs the markdown of that rich text field, and then put that markdown into a regex tester to see what is going on.
For example, rich text fields support two different types of markdown links. You only test for a single type of markdown link, not the other. Or maybe you are looking for links that are not actual markdown links but plain urls? Look for patterns in the urls you are finding versus the ones you aren't.
By the way, several things about this script seem unnecessarily complicated.
Jul 31, 2023 05:31 AM - edited Jul 31, 2023 05:31 AM
First off, I'd like to clarify that the script did work for my use case. I may have put this thread into the wrong category (maybe it should be Show and Tell) but I thought feedback on how the script could be improved would also be nice.
Hi @Sho, the field is a rich text formatted field that has text (a google document title) with the text being hyperlinked to the corresponding google doc.
Hi @kuovonne I understand the need to see examples and as a said above, the field is a rich text formatted field that has text (a google document title) with the text being hyperlinked to the corresponding google doc. I'm curious how you would test for the other markdown type. Also, it did get complicated as a result of me trying to work through iterations of troubleshooting. I kept having the script terminate at various places and I put in outputs to see where it was when that happened. I'm also curious what could have made it more simple, hence the reason for wanting to see how the script could be improved. Since I couldn't find a direct way to get the hyperlink from text on the community forum, I wanted to post this so someone else who may need to do that could have a reference.
Jul 31, 2023 08:53 PM - edited Jul 31, 2023 09:00 PM
Kuovonne is correct that there are two types of links in the rich format for long text fields.
The scripting you have shown supports one type.
If you replace the following part of the scripting with this one, it should support both types of links.
// Regular expression to extract URLs from Markdown-style hyperlinks
const urlRegex = /\[.*?\]\((.*?)\)|<(.*?)>/g;
// Loop through qualified records and extract URLs
for (let i = 0; i < records.length; i++) {
let recID = records[i].id;
let hyperlinkContent = records[i].getCellValue(hyperlinkField);
if (hyperlinkContent !== null && typeof hyperlinkContent === 'string') {
// Extract URLs from the hyperlink field using the regular expression
let match;
let extractedUrls = [];
while ((match = urlRegex.exec(hyperlinkContent)) !== null) {
if(match[1])extractedUrls.push(match[1]);
if(match[2])extractedUrls.push(match[2]);
}
Aug 02, 2023 07:31 AM
Thanks Sho. I appreciate the knowledge for sure.