Help

Re: URL builder question: Clean up URL field and append with querystring field

Solved
Jump to Solution
3612 1
cancel
Showing results for 
Search instead for 
Did you mean: 
airballer86
7 - App Architect
7 - App Architect

@Sho answered my first URL audit Find formula question

My followup question/challenge:

I want to clean up (not just identify) the {Link URL} field and join it with {reftag} field:

Add

  • missing https://
  • and/or missing "s" in http://

Remove

  • invalid characters; e.g., ' §
  • the following querystrings:
    • ?ref= 
    • ?ref_=
    • &ref=
    • &ref_=

Then join two fields ({Link URL} & {reftag}) based on logic:

  • If {Link URL} Does NOT contain '?' append {Link URL} with '?ref_=' & {reftag} field
  • If {Link URL} contains '?' append {Link URL} with '&ref_=' & {reftag} field

Here is the original solution used to callout errors in the {Link URL} field:

 

IF(
  {Link URL},
  IF(
    OR(
      FIND("http://", {Link URL}),
      NOT(FIND("https://", {Link URL}))
    ),
    'missing https, '
  ) &
  IF(
    FIND("http://", {Link URL}),
    'missing https, '
  ) &
  IF(
    FIND("ref=", {Link URL}),
    'remove ref=, '
  ) &
  IF(
    FIND("ref_=", {Link URL}),
    'remove ref_=, '
  ) &
  IF(
    FIND("'", {Link URL}),
    'remove apostrophe, '
  ) &
  IF(
    FIND("§", {Link URL}),
    'remove §, '
  )
)

 

 

 

 

1 Solution

Accepted Solutions
Arthur_Tutt
8 - Airtable Astronomer
8 - Airtable Astronomer

@airballer86 tonight's your lucky night - was able to work some magic and further refine the script to work to your examples. Here's the sample base and expected output:

Screenshot 2023-11-05 200418.png

And the script (note: all input variables are the same as before)

var urlTable = base.getTable("URL Markdown")

var inputConfig = input.config();
var recordID = inputConfig.recordID;
var originalURL = inputConfig.originalURL;
var refTag = inputConfig.refTag;

function cleanUrl(linkUrl, reftag) {
    // Add missing 'https://' or 'http://'
    if (!linkUrl.startsWith('http')) {
        linkUrl = 'https://' + linkUrl;
    }

    // Add missing 's' in 'http://'
    linkUrl = linkUrl.replace('http://', 'https://');

    // Remove invalid characters
    linkUrl = linkUrl.replace(/[\'§]/g, '');

    // Split the linkUrl by '&'
    const parts1 = linkUrl.split('?');
    console.log(parts1)

    if (parts1[1] == null) {
        let cleanedUrlComplete = parts1[0].toString()

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        }
        return cleanedUrlComplete;
    }

    if (parts1[1] != null) {
        const parts2 = parts1[1].split('&');
        console.log(parts2)

        // Filter out parameters starting with 'ref_=' or 'ref='
        const cleanedParts1 = [parts1[0]].filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts1)
        const cleanedParts2 = parts2.filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts2)
        let cleanedUrl2 = cleanedParts2.join('&');
        console.log(cleanedUrl2) 

        // Join fields based on logic
        let cleanedUrl1 = cleanedParts1.toString()
        console.log(cleanedUrl1)
        
        let cleanedUrl2Question;

        if (cleanedParts2.length == 0) {
            cleanedUrl2Question = cleanedUrl2
            console.log(cleanedUrl2Question)
        } else {
            cleanedUrl2Question = `?${cleanedUrl2}`
            console.log(cleanedUrl2Question)
        }
        
        let cleanedUrlComplete = cleanedUrl1 + cleanedUrl2Question
        console.log(cleanedUrlComplete)

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        } 
        return cleanedUrlComplete;
    }
}

// Example usage
// const linkUrl = 'example.com/page?param=value&ref_=123&other=456';
// const reftag = '456';

const cleanedUrl = cleanUrl(originalURL, refTag);
console.log(cleanedUrl);

// Output Cleaned URL
var updates = [{
  "id": recordID,
        fields: {
            "Cleaned URL": cleanedUrl
        }
}]
// console.log(updates)

await urlTable.updateRecordsAsync(updates);

 

Note: this script could prob use some clean up / refinement, but test it out, it at least should be working! 

See Solution in Thread

11 Replies 11
Kenneth_Raghuna
8 - Airtable Astronomer
8 - Airtable Astronomer

This would be easier to do with a script. If you use a formula though, here are some functions you'll want to look at to get construct a method for each case:

  • SEARCH()
  • SUBSTITUTE()
  • LEFT()
  • RIGHT()
  • MID()
  • LEN()

I really suggest using a script though. This formula is going to look EXTREMELY messy as you'll have to repeat a few steps multiple times.

Arthur_Tutt
8 - Airtable Astronomer
8 - Airtable Astronomer

Hey @airballer86 Great question. Like @Kenneth_Raghuna said I'd recommend an automation here. I recorded a quick Loom Video to show you step by step how I'd set it up. 

*** Remember to change all my Table / Field / Variable names to match yours or this won't work for you ***

Sample Table Setup And Desired Output:

Screenshot 2023-11-04 171155.png

 

Automation Trigger Setup:

Screenshot 2023-11-04 171237.png

 

Script Input Variables:

Screenshot 2023-11-04 171314.png

 

 Automation Script:

var urlTable = base.getTable("URL Markdown") // change this table name to match yours

var inputConfig = input.config();
var recordID = inputConfig.recordID;
var originalURL = inputConfig.originalURL;
var refTag = inputConfig.refTag;

function cleanUrl(linkUrl, reftag) {
    // Add missing 'https://' or 'http://'
    if (!linkUrl.startsWith('http')) {
        linkUrl = 'https://' + linkUrl;
    }

    // Add missing 's' in 'http://'
    linkUrl = linkUrl.replace('http://', 'https://');

    // Remove invalid characters
    linkUrl = linkUrl.replace(/[\'§]/g, '');

    // Remove everything after '?ref='
    const index1 = linkUrl.indexOf('?ref=');
    if (index1 !== -1) {
        linkUrl = linkUrl.slice(0, index1 + 5);
    }

    // Remove everything after '?ref_='
    const index3 = linkUrl.indexOf('?ref_=');
    if (index3 !== -1) {
        linkUrl = linkUrl.slice(0, index3 + 6);
    }

    // Remove specified query strings
    const queryStringsToRemove = ['?ref=', '?ref_=', '&ref=', '&ref_='];
    for (const qs of queryStringsToRemove) {
        linkUrl = linkUrl.replace(qs, '');
    }

    

    // Join fields based on logic
    if (refTag != null) {
        if (!linkUrl.includes('?')) {
            linkUrl += `?ref_=${reftag}`;
        } else {
            linkUrl += `&ref_=${reftag}`;
        }
    }
    
    return linkUrl;
}

// Example usage
// const linkUrl = 'example.com/page?param=value&ref_=123&other=456';
// const reftag = '456';

const cleanedUrl = cleanUrl(originalURL, refTag);
console.log(cleanedUrl);

// Output Cleaned URL
var updates = [{
  "id": recordID,
        fields: {
            "Cleaned URL": cleanedUrl
        }
}]
console.log(updates)

await urlTable.updateRecordsAsync(updates);

Let me know if this works for you!

@Arthur_Tutt , Thank you for the prompt and amazing response! I am putting this in place now. I left you one follow up question on the Loom video:

  • for the cleanUrl functions
    • can the " // Remove everything after '?ref=' " be modified to remove everything until next ?/&
    • and can you add logic to look for ?ref= and &ref= 

I'm beyond thankful and impressed!

I did run the script as is (updating the fields as indicated) and getting the following errors:

ERROR
TypeError: Cannot read properties of undefined (reading 'startsWith')
    at cleanUrl on line 10
    at main on line 56
Arthur_Tutt
8 - Airtable Astronomer
8 - Airtable Astronomer

Hey @airballer86 loved the video follow up, that's the first one ever received! 😆

Alrighty, so let's do a few examples so I'm clear on what you're looking for:

example.com?apple&ref=square&orange  would become:

https://example.com?apple&orange 

and example.com?ref_=rectangle&ref_=square&apple&ref=triangle&orange would become:

https://example.com?apple&orange 

So essentially you're just trying to eliminate anything directly related to a 'ref' (in it's various formats) but keep the other query parameters (and then add your ref tags if applicable after). Do I have that correct?

If not share a few example cases so I'm clear. Thanks! 

Arthur_Tutt
8 - Airtable Astronomer
8 - Airtable Astronomer

@airballer86 tonight's your lucky night - was able to work some magic and further refine the script to work to your examples. Here's the sample base and expected output:

Screenshot 2023-11-05 200418.png

And the script (note: all input variables are the same as before)

var urlTable = base.getTable("URL Markdown")

var inputConfig = input.config();
var recordID = inputConfig.recordID;
var originalURL = inputConfig.originalURL;
var refTag = inputConfig.refTag;

function cleanUrl(linkUrl, reftag) {
    // Add missing 'https://' or 'http://'
    if (!linkUrl.startsWith('http')) {
        linkUrl = 'https://' + linkUrl;
    }

    // Add missing 's' in 'http://'
    linkUrl = linkUrl.replace('http://', 'https://');

    // Remove invalid characters
    linkUrl = linkUrl.replace(/[\'§]/g, '');

    // Split the linkUrl by '&'
    const parts1 = linkUrl.split('?');
    console.log(parts1)

    if (parts1[1] == null) {
        let cleanedUrlComplete = parts1[0].toString()

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        }
        return cleanedUrlComplete;
    }

    if (parts1[1] != null) {
        const parts2 = parts1[1].split('&');
        console.log(parts2)

        // Filter out parameters starting with 'ref_=' or 'ref='
        const cleanedParts1 = [parts1[0]].filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts1)
        const cleanedParts2 = parts2.filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts2)
        let cleanedUrl2 = cleanedParts2.join('&');
        console.log(cleanedUrl2) 

        // Join fields based on logic
        let cleanedUrl1 = cleanedParts1.toString()
        console.log(cleanedUrl1)
        
        let cleanedUrl2Question;

        if (cleanedParts2.length == 0) {
            cleanedUrl2Question = cleanedUrl2
            console.log(cleanedUrl2Question)
        } else {
            cleanedUrl2Question = `?${cleanedUrl2}`
            console.log(cleanedUrl2Question)
        }
        
        let cleanedUrlComplete = cleanedUrl1 + cleanedUrl2Question
        console.log(cleanedUrlComplete)

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        } 
        return cleanedUrlComplete;
    }
}

// Example usage
// const linkUrl = 'example.com/page?param=value&ref_=123&other=456';
// const reftag = '456';

const cleanedUrl = cleanUrl(originalURL, refTag);
console.log(cleanedUrl);

// Output Cleaned URL
var updates = [{
  "id": recordID,
        fields: {
            "Cleaned URL": cleanedUrl
        }
}]
// console.log(updates)

await urlTable.updateRecordsAsync(updates);

 

Note: this script could prob use some clean up / refinement, but test it out, it at least should be working! 

@Arthur_Tutt I'm not sure if it's user error on my part but I'm getting the following error:

ERROR
TypeError: Cannot read properties of undefined (reading 'startsWith')
    at cleanUrl on line 10
    at main on line 81
Arthur_Tutt
8 - Airtable Astronomer
8 - Airtable Astronomer

@airballer86 can you paste in your entire script? I'm guessing some variable didn't get updated properly converting from my setup to yours. And also take a screenshot of how you setup your input variables (in the left panel)

@Arthur_Tutt here you go:

 

var urlTable = base.getTable("Link Checker")

var inputConfig = input.config();
var recordID = inputConfig.recordID;
var originalURL = inputConfig.originalURL;
var refTag = inputConfig.refTag;

function cleanUrl(linkUrl, reftag) {
    // Add missing 'https://' or 'http://'
    if (!linkUrl.startsWith('http')) {
        linkUrl = 'https://' + linkUrl;
    }

    // Add missing 's' in 'http://'
    linkUrl = linkUrl.replace('http://', 'https://');

    // Remove invalid characters
    linkUrl = linkUrl.replace(/[\'§]/g, '');

    // Split the linkUrl by '&'
    const parts1 = linkUrl.split('?');
    console.log(parts1)

    if (parts1[1] == null) {
        let cleanedUrlComplete = parts1[0].toString()

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        }
        return cleanedUrlComplete;
    }

    if (parts1[1] != null) {
        const parts2 = parts1[1].split('&');
        console.log(parts2)

        // Filter out parameters starting with 'ref_=' or 'ref='
        const cleanedParts1 = [parts1[0]].filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts1)
        const cleanedParts2 = parts2.filter(part => !/^ref(_|=)/.test(part));
        console.log(cleanedParts2)
        let cleanedUrl2 = cleanedParts2.join('&');
        console.log(cleanedUrl2) 

        // Join fields based on logic
        let cleanedUrl1 = cleanedParts1.toString()
        console.log(cleanedUrl1)
        
        let cleanedUrl2Question;

        if (cleanedParts2.length == 0) {
            cleanedUrl2Question = cleanedUrl2
            console.log(cleanedUrl2Question)
        } else {
            cleanedUrl2Question = `?${cleanedUrl2}`
            console.log(cleanedUrl2Question)
        }
        
        let cleanedUrlComplete = cleanedUrl1 + cleanedUrl2Question
        console.log(cleanedUrlComplete)

        if (refTag != null) {
            if (!cleanedUrlComplete.includes('?')) {
                cleanedUrlComplete += `?ref_=${reftag}`;
            } else {
                cleanedUrlComplete += `&ref_=${reftag}`;
            }
        } 
        return cleanedUrlComplete;
    }
}

// Example usage
// const linkUrl = 'example.com/page?param=value&ref_=123&other=456';
// const reftag = '456';

const cleanedUrl = cleanUrl(originalURL, refTag);
console.log(cleanedUrl);

// Output Cleaned URL
var updates = [{
  "id": recordID,
        fields: {
            "Cleaned URL": cleanedUrl
        }
}]
// console.log(updates)

await urlTable.updateRecordsAsync(updates);

 

and a screen shot of the variables