Skip to product information
1 of 1

Depth-Controlled Media Crawler with Cordova & Fetch API

Depth-Controlled Media Crawler with Cordova & Fetch API

Regular price $49.95 USD
Regular price Sale price $49.95 USD
Sale Sold out
Quantity

Boost your web crawling capabilities with our enhanced JavaScript crawler designed for Cordova applications. This tool features depth-controlled crawling by queuing each URL with a specified recursion depth, ensuring your crawl doesn't go too deep. With built-in progress tracking and integrated media downloading, you can fetch video and audio files seamlessly using the modern fetch API and Cordova File plugin.

Key Features:

  • Depth-controlled crawling with a customizable maximum depth (default set to 3)
  • Synchronous, unbounded recursive crawl for orderly page processing
  • Real-time display of pages crawled
  • Media download functionality supporting video and audio extraction
  • Error handling and progress updates using a simple on-page info element

Included code snippet demonstrates the integration of a recursive queue, media downloading using Cordova File plugin, and dynamic updating of crawl progress. Whether you’re a developer or a marketer looking for robust web crawling functionality, this solution is well-equipped to handle the task.

"use strict";

// Wait for device to be ready before starting
 document.addEventListener("deviceready", onDeviceReady, false);

 function onDeviceReady() {
   document.getElementById("status").textContent = "Device Ready – starting crawl…";
   var baseUrl = "https://example.com";
   var maxDepth = 3;
   startCrawling(baseUrl, baseUrl, maxDepth);
 }

 // Start recursive crawl with depth control
 function startCrawling(baseUrl, startUrl, maxDepth) {
   var pageQueue = [{ url: startUrl, depth: 0 }];
   var visitedPages = new Set();
   var pagesCrawledCount = 0;
   
   function crawlNext() {
     if (pageQueue.length === 0) {
       document.getElementById("status").textContent = "Crawl complete. Pages crawled: " + pagesCrawledCount;
       return;
     }

     var pageInfo = pageQueue.shift();
     var currentUrl = pageInfo.url;
     var currentDepth = pageInfo.depth;

     if (visitedPages.has(currentUrl)) {
       crawlNext();
       return;
     }
     visitedPages.add(currentUrl);
     pagesCrawledCount++;
     updateCrawlInfo(pagesCrawledCount);

     fetch(currentUrl)
       .then(response => response.text())
       .then(function(htmlString) {
         var parser = new DOMParser();
         var doc = parser.parseFromString(htmlString, "text/html");
         var anchors = doc.querySelectorAll("a");
         anchors.forEach(function(a) {
           var href = a.href;
           if (href && href.indexOf(baseUrl) === 0 && !visitedPages.has(href) && currentDepth < maxDepth) {
             pageQueue.push({ url: href, depth: currentDepth + 1 });
           }
         });
         // Process media downloading ...
       })
       .catch(function(err) {
         console.error("Error crawling " + currentUrl + ":", err);
       })
       .finally(crawlNext);
   }

   crawlNext();
 }
View full details