Automating Data Import from Webpages to Google Sheets with Apps Script

AppSScript Code

function importDataFromWeb() {
  var url = "www.example.com"; // Replace with the correct URL of the webpage you want to import data from
  var filterValues = [""]; // Specify the values to filter on
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  var response = UrlFetchApp.fetch(url);
  var content = response.getContentText();


  // Extract table data using regular expressions
  var tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/g;
  var rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/g;
  var cellRegex = /<t[hd][^>]*>([\s\S]*?)<\/t[hd]>/g;


  var tables = content.match(tableRegex);


  if (tables) {
    tables.forEach(function(table) {
      var rows = table.match(rowRegex);


      rows.forEach(function(row) {
        var cells = row.match(cellRegex);


        var matchFound = filterValues.every(function(filterValue) {
          return cells.some(function(cell) {
            return cell.includes(filterValue);
          });
        });


        if (matchFound) {
          // Specify the desired column indexes to import (starting from 0)
          var columnIndexes = [0,1,2,3]; // Example: Importing the 1st and 2nd columns


          var values = columnIndexes.map(function(index) {
            var cell = cells[index];
            return cell ? cell.replace(/<\/?[^\>]+>/g, '') : ''; // Remove HTML tags from cell content
          });


          sheet.appendRow(values);
        }
      });
    });
  } else {
    Logger.log("No table found on the webpage.");
  }
}