Build sitemap extractor using JavaScript
About Sitemap:
A sitemap is a file where you provide information about the pages, videos, and other files on your site, and the relationships between them. Search engines like Google read this file to crawl your site more efficiently.
This is one of the perfect examples of asynchronous programming in JavaScript where we will be using Promise to make it work.
<!DOCTYPE html> <html> <head> <link href= "https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity= "sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous"/> </head> <body> <section class="content"> <div class="container-fluid"> <table> <tbody></tbody> </table> </div> </section> <script src='https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js' type='text/javascript'></script> <script type='text/javascript'> $(document).ready(function(){ const siteList = []; const sitemapPromise = fetch("https://www.brightenminds.com/sitemap.xml"); sitemapPromise .then((response) => response.body) //return ReadableStream .then((readableStream) => { const reader = readableStream.getReader(); return new ReadableStream({ start(controller) { // The following function handles each data chunk function push() { // "done" is a Boolean and value a "Uint8Array" reader.read().then(({ done, value }) => { // If there is no more data to read if (done) { //console.log("done", done); controller.close(); return; } // Get the data and send it to the browser via the controller controller.enqueue(value); // Check chunks by logging to the console //console.log(done, value); push(); }); } push(); }, }); }) .then((stream) => // Respond with our stream new Response(stream, { headers: { "Content-Type": "text/xml" } }).text(), ) .then((result) => { if (window.DOMParser) { parser = new DOMParser(); xmlDoc = parser.parseFromString(result,"text/xml"); } else { // Internet Explorer xmlDoc = new ActiveXObject("Microsoft.XMLDOM"); xmlDoc.async = false; xmlDoc.loadXML(result); } let urlset = xmlDoc.getElementsByTagName("urlset"); let urlList = []; if(urlset && urlset[0].childNodes && urlset[0].childNodes.length > 0) { for(let index = 0; index < urlset[0].childNodes.length; index++) { //console.log(xmlDoc.getElementsByTagName("urlset")[0].childNodes[index].getElementsByTagName("loc")[0].childNodes[0].nodeValue); let value = xmlDoc.getElementsByTagName("urlset")[0].childNodes[index].getElementsByTagName("loc")[0].childNodes[0].nodeValue; siteList.push(value); value = `<tr><td>${value}</td></tr>`; $(".container-fluid>table>tbody").append(value); } //console.log(siteList); } }) .catch((error) => { console.error(`Failed to fetch: ${error}`); }); console.log("Started request…"); }); </script> </body> </html>Output: