January 22, 2026

Google Sheets + OpenAI: crawl sites, audit faster

Lisa Granqvist Partner Workflow Automation Expert

Get a free AI assessment → ⬇️ Use template

You open a site audit and immediately drown in tabs. Home, services, blog categories, random landing pages, then another tab for headings, another for links, another for notes you swear you’ll organize later.

This is where site crawl automation helps. SEO analysts usually feel the pain first, but content leads and agency account managers get stuck in the same loop: find page, copy headings, count links, summarize, repeat.

This workflow crawls a site, stores the useful bits in Google Sheets, then lets you ask questions like a chatbot using that saved data. You’ll see what it does, what you need, and where teams typically save a few hours per audit.

How This Automation Works

See how this solves the problem:

n8n Workflow Template: Google Sheets + OpenAI: crawl sites, audit faster

Click to explore

flowchart LR

    subgraph sg0["Chat web Flow"]
        direction LR
        n0@{ icon: "mdi:robot", form: "rounded", label: "AI Agent", pos: "b", h: 48 }
        n1@{ icon: "mdi:brain", form: "rounded", label: "OpenAI Chat Model", pos: "b", h: 48 }
        n2@{ icon: "mdi:memory", form: "rounded", label: "Simple Memory", pos: "b", h: 48 }
        n3@{ icon: "mdi:database", form: "rounded", label: "Get row(s) in sheet in Googl..", pos: "b", h: 48 }
        n4@{ icon: "mdi:swap-horizontal", form: "rounded", label: "If", pos: "b", h: 48 }
        n5["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/httprequest.dark.svg' width='40' height='40' /></div><br/>Maping Sitemap"]
        n6@{ icon: "mdi:cog", form: "rounded", label: "XML1", pos: "b", h: 48 }
        n7["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>UA Rotativo1"]
        n8@{ icon: "mdi:location-exit", form: "rounded", label: "Req Error", pos: "b", h: 48 }
        n9@{ icon: "mdi:location-exit", form: "rounded", label: "Sitemap Error", pos: "b", h: 48 }
        n10["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/httprequest.dark.svg' width='40' height='40' /></div><br/>Req robots"]
        n11["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>extract sitemap url"]
        n12@{ icon: "mdi:swap-vertical", form: "rounded", label: "OPTIONS", pos: "b", h: 48 }
        n13@{ icon: "mdi:robot", form: "rounded", label: "AI Agent1", pos: "b", h: 48 }
        n14@{ icon: "mdi:brain", form: "rounded", label: "OpenAI Chat Model1", pos: "b", h: 48 }
        n15@{ icon: "mdi:play-circle", form: "rounded", label: "Chat web", pos: "b", h: 48 }
        n16@{ icon: "mdi:robot", form: "rounded", label: "Structured Output Parser", pos: "b", h: 48 }
        n17@{ icon: "mdi:swap-horizontal", form: "rounded", label: "If1", pos: "b", h: 48 }
        n18@{ icon: "mdi:robot", form: "rounded", label: "Respond to Chat", pos: "b", h: 48 }
        n19@{ icon: "mdi:robot", form: "rounded", label: "Respond to Chat1", pos: "b", h: 48 }
        n20@{ icon: "mdi:robot", form: "rounded", label: "Message a model", pos: "b", h: 48 }
        n21@{ icon: "mdi:cog", form: "rounded", label: "XML", pos: "b", h: 48 }
        n22@{ icon: "mdi:swap-vertical", form: "rounded", label: "Loop Over Items", pos: "b", h: 48 }
        n23@{ icon: "mdi:robot", form: "rounded", label: "Message a model1", pos: "b", h: 48 }
        n24@{ icon: "mdi:database", form: "rounded", label: "Append row in sheet in Googl..", pos: "b", h: 48 }
        n25@{ icon: "mdi:database", form: "rounded", label: "Complete", pos: "b", h: 48 }
        n26@{ icon: "mdi:web", form: "rounded", label: "HTTP Request2", pos: "b", h: 48 }
        n27["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>Merge"]
        n28@{ icon: "mdi:swap-vertical", form: "rounded", label: "Split URLs", pos: "b", h: 48 }
        n29["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/httprequest.dark.svg' width='40' height='40' /></div><br/>Req URL"]
        n30["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/markdown.dark.svg' width='40' height='40' /></div><br/>HTML to Markdown"]
        n31["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/httprequest.dark.svg' width='40' height='40' /></div><br/>Maping Sitemaps"]
        n32@{ icon: "mdi:database", form: "rounded", label: "Get data schema", pos: "b", h: 48 }
        n4 --> n0
        n4 --> n13
        n17 --> n12
        n17 --> n18
        n21 --> n27
        n6 --> n20
        n27 --> n28
        n12 --> n7
        n29 --> n30
        n0 --> n19
        n15 --> n32
        n13 --> n17
        n10 --> n11
        n10 --> n8
        n28 --> n22
        n7 --> n10
        n26 -.-> n0
        n2 -.-> n0
        n5 --> n6
        n5 --> n9
        n32 --> n4
        n22 --> n25
        n22 --> n29
        n31 --> n21
        n20 --> n31
        n30 --> n23
        n23 --> n22
        n1 -.-> n0
        n14 -.-> n13
        n11 --> n5
        n16 -.-> n13
        n24 -.-> n23
        n3 -.-> n0
    end

    %% Styling
    classDef trigger fill:#e8f5e9,stroke:#388e3c,stroke-width:2px
    classDef ai fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
    classDef aiModel fill:#e8eaf6,stroke:#3f51b5,stroke-width:2px
    classDef decision fill:#fff8e1,stroke:#f9a825,stroke-width:2px
    classDef database fill:#fce4ec,stroke:#c2185b,stroke-width:2px
    classDef api fill:#fff3e0,stroke:#e65100,stroke-width:2px
    classDef code fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
    classDef disabled stroke-dasharray: 5 5,opacity: 0.5
    class n15 trigger
    class n0,n13,n16,n18,n19,n20,n23 ai
    class n1,n14 aiModel
    class n2 ai
    class n4,n17 decision
    class n3,n24,n25,n32 database
    class n5,n10,n26,n29,n31 api
    class n7,n11,n27 code
    classDef customIcon fill:none,stroke:none
    class n5,n7,n10,n11,n27,n29,n30,n31 customIcon

The Challenge: Website audits turn into tab chaos

A “quick” website audit rarely stays quick. You start with one question (“what’s on the services pages?”), then you’re manually checking headings, scanning navigation paths, and pulling links into a doc that nobody ever reuses. The mental load is sneaky too. You’re constantly switching context, which makes you miss obvious issues like duplicate H1s, thin pages hiding in a sitemap, or outdated CTAs. And if you’re doing this for multiple client sites, it becomes the same repetitive grind every week.

It adds up fast. Here’s where it breaks down in real life.

You end up copying headings and links by hand, and the moment you paste them into a doc they go stale.
Sitemaps are inconsistent across sites, so you waste time hunting for the “right” URLs to review.
Audits get stuck at “observations” because summarizing every important page takes forever.
When someone asks a follow-up question a week later, you have to redo the crawl because your notes aren’t structured.

The Fix: Crawl once, store in Sheets, then ask questions

This workflow starts as a web consultation chatbot, but the real win is what it remembers. The first time you give it a website URL, it validates the URL with AI, finds the sitemap through robots.txt, and chooses the most relevant sitemap (pages, posts, categories, tags) based on your scan options. Then it crawls the selected URLs in batches, downloads each page’s HTML, converts it to Markdown, and uses OpenAI to extract the language, heading hierarchy, internal links, external links, and a clean summary. Each page becomes a structured row in Google Sheets, so you can filter, sort, and reuse it later. Once indexing is done, the sheet gets flagged as ready, which flips the workflow into “agent mode” for future questions.

After that first crawl, you don’t re-audit from scratch. You ask questions in chat, and the AI agent reads your Google Sheets “memory” to answer quickly. If something needs fresh info, it can also make real-time HTTP requests so you’re not stuck trusting old snapshots.

What Changes: Before vs. After

What This Eliminates

Impact You’ll See

Manually opening dozens of pages to collect H1/H2 structure.
Guessing which sitemap matters, then clicking around to confirm.
Copy-pasting internal and external links into scattered notes.
Re-crawling the same site every time a stakeholder asks “one more thing.”

Most audits shrink from about 4 hours to about 1 hour once the site is indexed.
You get a reusable Google Sheets database you can filter in seconds.
Headings and link inventories become consistent across every client site.
Follow-up questions turn into quick queries instead of new research work.
You can spot content gaps and duplicate patterns faster because everything is normalized.

Real-World Impact

Say you’re auditing a 50-page marketing site. Manually, you might spend about 5 minutes per page to open it, note the H1/H2s, skim for the gist, and grab a couple of key links, which is roughly 4 hours of pure busywork. With this workflow, you submit the URL once, let it crawl in the background, and you’re left with a Google Sheet containing headings, links, and summaries for every page. From there, answering “show me all H1s on services pages” takes minutes, not another afternoon.

Requirements

n8n instance (try n8n Cloud free)
Self-hosting option if you prefer (Hostinger works well)
Google Sheets to store crawl results and “memory”.
OpenAI to extract headings, links, and summaries.
OpenAI API key (get it from your OpenAI dashboard).

Skill level: Intermediate. You’ll connect accounts, paste API keys, and tweak scan options, but you won’t be writing real code unless you want to.

Need help implementing this? Talk to an automation expert (free 15-minute consultation).

The Workflow Flow

A chat message triggers the workflow. You enter a website URL in the chat interface, and the workflow first checks Google Sheets to see if that site has already been indexed.

It validates the URL and discovers the sitemap. If the site is new, an AI validator confirms the URL is usable, then an HTTP request pulls robots.txt to locate sitemap URLs. Another AI step selects the most relevant sitemap based on your scan options (for example, pages vs. posts).

The crawler processes pages in batches. Each URL is fetched with an HTTP request using a rotating user-agent, the HTML is converted to Markdown, and OpenAI analyzes the content to extract language, heading structure, internal links, external links, and a summary. Those results are appended into Google Sheets row-by-row.

Then it switches into “agent mode” for questions. Once the sheet is flagged as indexed, the LangChain-style agent reads your stored rows to answer questions like “what’s on the contact page?” It can also do live HTTP checks when you need up-to-date info.

You can easily modify which URL types get scanned (pages, posts, categories) to match your audit style. See the full implementation guide below for customization options.

Step-by-Step Implementation Guide

Step 1: Configure the Web Chat Trigger

Set up the inbound chat endpoint that starts the workflow and passes user input into the AI validation and conversation paths.

Add and open Web Chat Trigger.
Set Public to true and Authentication to basicAuth.
Credential Required: Connect your httpBasicAuth credentials.
Confirm that Web Chat Trigger outputs to Retrieve Data Schema as the first step in the flow.

Step 2: Connect Google Sheets

These nodes read and write the website content database used by the assistant and by the summarization process.

Open Retrieve Data Schema and select the spreadsheet in Document ID and Sheet Name (e.g., gid=0).
In Retrieve Data Schema, ensure the filter uses Lookup Column Data schema with Lookup Value ={{ true }}.
Credential Required: Connect your googleSheetsOAuth2Api credentials in Retrieve Data Schema.
Open Schema Flag Update, set Operation to appendOrUpdate, and map Data schema to ={{true}}.
Credential Required: Connect your googleSheetsOAuth2Api credentials in Schema Flag Update.
Open Append Sheet Row, keep Operation as append, and confirm column mappings use the AI expressions like ={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Summary_Content', ``, 'string') }}.
Credential Required: Connect your googleSheetsOAuth2Api credentials in Append Sheet Row.

Tip: Fetch Sheet Rows and Append Sheet Row are AI tools. Their credentials are configured directly on those nodes, but they are used by Conversational Agent and Page Summary Model as tools.

Step 3: Set Up URL Validation and Schema Checks

This section validates user URLs, checks whether the schema is already present, and routes into either the chat response or sitemap scanning flow.

Open Schema Check and confirm the condition uses ={{ $json['Data schema'] }} with a boolean true check.
Open URL Validator Agent and set Text to ={{ $node["Web Chat Trigger"].json["chatInput"] }}.
Attach Structured Parser to URL Validator Agent as the output parser and keep the JSON schema example:

{ "URL": "example.com", "URL_bool": true }

Connect OpenAI Validator Model as the language model for URL Validator Agent.
Credential Required: Connect your openAiApi credentials in OpenAI Validator Model (credentials are set on the model node, not the agent).
Open URL Valid Check and confirm the condition uses ={{ $json.output.URL_bool }} with a boolean true check.
For invalid URLs, ensure Invalid URL Reply sends the message Debes introducir una URL válida ejemplo: https://google.es.

⚠️ Common Pitfall: If the URL validator returns a non-boolean in URL_bool, URL Valid Check will not pass. Keep the structured parser schema intact.

Step 4: Configure the Conversational Agent and Memory

This agent answers chat requests using the website data in Google Sheets and a language model, with memory enabled for context.

Open Conversational Agent and set Text to ={{ $node["Web Chat Trigger"].json["chatInput"] }}.
Connect OpenAI Chat Engine as the language model for Conversational Agent.
Credential Required: Connect your openAiApi credentials in OpenAI Chat Engine.
Attach Lightweight Memory to Conversational Agent and keep Context Window Length at 50.
Attach Fetch Sheet Rows as an AI tool to Conversational Agent so it can retrieve website content from the sheet.
Credential Required: Connect your googleSheetsOAuth2Api credentials in Fetch Sheet Rows (tools use their own credentials, but are invoked by the agent).
Confirm Conversational Agent routes to Agent Response and that Agent Response uses ={{ $json.output }} for its message.

Step 5: Configure Sitemap Discovery and Selection

These nodes build a valid sitemap URL from robots.txt, fetch the sitemap, and select the correct sitemap section using AI.

Open Scan Options and keep the boolean flags as configured: scan_pages true, scan_posts false, category false, tags false.
In Random UA Picker, keep the provided JavaScript that returns a random userAgent string.
Open Robots.txt Request and set URL to ={{ $node["URL Validator Agent"].json["output"]["URL"] }}/robots.txt.
Confirm Parse Sitemap URL parses the robots.txt content and returns sitemapUrl as coded.
Open Sitemap Fetch and set URL to ={{ $json.sitemapUrl }} with headers including User-Agent ={{ $json.userAgent }}.
Open XML Decode A to parse the sitemap XML and pass it to Sitemap Selector.
Open Sitemap Selector, keep JSON Output enabled, and verify it references scan options and the sitemap index locs using expressions like {{ $json.sitemapindex.sitemap[0].loc }}.
Credential Required: Connect your openAiApi credentials in Sitemap Selector.
Open Page Sitemap Fetch and set URL to ={{ $json.message.content.sitemap_page }}, then route to XML Decode B.

⚠️ Common Pitfall: If Parse Sitemap URL returns null, Sitemap Fetch will fail and trigger Sitemap Failure Halt. Ensure robots.txt contains a valid Sitemap: line.

Step 6: Configure URL Splitting, Page Requests, and Summarization

This stage expands the selected sitemap into individual URLs, fetches each page, converts HTML to Markdown, and summarizes the content with AI.

Open Combine URL Map and keep the JavaScript that transforms urlset.url into a urls object.
Open Split URL Items and set Field To Split Out to urls.
Open Batch Iterator to control batch processing of URLs before requesting pages.
Open Page Request and set URL to ={{ $('Split URL Items').item.json.urls }} with User-Agent header ={{ $json.userAgent }}.
Open HTML Convert to MD and set HTML to ={{ $json.data }}.
Open Page Summary Model and ensure the message includes =URL: {{ $('Split URL Items').item.json.urls }} {{ $json.data }} in the prompt.
Credential Required: Connect your openAiApi credentials in Page Summary Model.
Confirm Page Summary Model uses Append Sheet Row as an AI tool to write results to the sheet.

Tip: If pages return blank content, check Page Request headers and the User-Agent from Random UA Picker.

Step 7: Add Error Handling

The workflow includes explicit stops for invalid URLs and sitemap failures.

Verify Request Error Halt is connected from the error output of Robots.txt Request and uses the message URL mal introducida, debes introducir con el siguiente formato: ejemplo.com.
Verify Sitemap Failure Halt is connected from the error output of Sitemap Fetch and uses the message Sitemap no encontrado o acceso bloqueadp.

⚠️ Common Pitfall: If Sitemap Fetch returns a 403/404, confirm the sitemap URL from Parse Sitemap URL and whether the site blocks automated requests.

Step 8: Test and Activate Your Workflow

Run a controlled test to validate the end-to-end chat response and sitemap summarization flow.

Click Execute Workflow and send a chat message through Web Chat Trigger with a URL like https://example.com.
Confirm a valid URL routes through URL Valid Check to Scan Options, Robots.txt Request, and Sitemap Fetch.
Verify that Page Summary Model runs and Append Sheet Row adds a row with Lang, Page URL, and Summary Content.
Check that Agent Response returns a chat reply when Schema Check is true and that Invalid URL Reply triggers for invalid URLs.
Once validated, switch the workflow to Active for production use.

🔒

Unlock Full Step-by-Step Guide

Get the complete implementation guide + downloadable template

Watch Out For

Google Sheets credentials can expire or need specific permissions. If things break, check the n8n credential connection and the sheet sharing settings first.
If you’re using Wait nodes or external rendering, processing times vary. Bump up the wait duration if downstream nodes fail on empty responses.
Default prompts in AI nodes are generic. Add your brand voice early or you’ll be editing outputs forever.

Common Questions

How quickly can I implement this site crawl automation automation?

About 30 minutes if your accounts and API keys are ready.

Can non-technical teams implement this site crawl automation?

Yes, but someone should be comfortable connecting Google Sheets and adding an OpenAI API key. The rest is mostly choosing scan options and testing with one small site first.

Is n8n free to use for this site crawl automation workflow?

Yes. n8n has a free self-hosted option and a free trial on n8n Cloud. Cloud plans start at $20/month for higher volume. You’ll also need to factor in OpenAI API costs, which can climb during large crawls.

Where can I host n8n to run this automation?

Two options: n8n Cloud (managed, easiest setup) or self-hosting on a VPS. For self-hosting, Hostinger VPS is affordable and handles n8n well. Self-hosting gives you unlimited executions but requires basic server management.

How do I adapt this site crawl automation solution to my specific challenges?

You can change what gets crawled by adjusting the Scan Options setup and the sitemap selection behavior, so you only process pages, posts, or other URL types. Many teams also customize the AI extraction prompt used during page analysis to capture things like meta titles, CTAs, or schema snippets. If you want fresher answers later, keep the agent’s live HTTP tool enabled so it can re-check critical pages on demand.

Why is my Google Sheets connection failing in this workflow?

Usually it’s an expired token or the sheet permissions changed. Reconnect the Google Sheets credential in n8n, confirm the correct Google account is used, and make sure the target spreadsheet is accessible to that account. If it fails only on “append row,” check that your header columns still match what the workflow is trying to write.

What’s the capacity of this site crawl automation solution?

It depends on your n8n plan and your server. On self-hosted n8n there’s no execution cap, but crawl size is limited by runtime, rate limits, and OpenAI token costs, especially during the first indexing pass.

Is this site crawl automation automation better than using Zapier or Make?

For crawling and analysis, n8n is usually the practical choice because you need batching, conditional logic, and a “store it in Sheets then query it later” pattern. Zapier and Make can do pieces of this, but long-running crawls and multi-step processing get awkward fast, and costs can jump when you’re iterating over many URLs. n8n also gives you self-hosting, which matters when you’re running lots of executions. Honestly, the deciding factor is complexity: simple two-app sync, Zapier is fine. For this workflow’s crawl + AI extraction + agent mode setup, n8n fits better. If you want a second opinion, Talk to an automation expert.

Once a site is indexed, the annoying part of auditing goes quiet. You keep the structured sheet, ask better questions, and move faster on the work that actually changes performance.

Need Help Setting This Up?

Our automation experts can build and customize this workflow for your specific needs. Free 15-minute consultation—no commitment required.

Lisa Granqvist

Workflow Automation Expert

Expert in workflow automation and no-code tools.

{
"id": null,
"name": "Automated Web Sitemap Chat Assistant",
"versionId": null,
"meta": {
"instanceId": "526bb8f239ba64c0eb3a2d4feb1346dfa11dcb443b4615ebe5b9567365f83866",
"templateCredsSetupCompleted": null,
"templateId": null
},
"tags": [],
"nodes": [
{
"id": "flowpast-topbar-7368",
"name": "Flowpast Branding",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3758,
160
],
"parameters": {
"color": 7,
"width": 2822,
"height": 80,
"content": "## Flowpast.com | Automation Workflow Library\n**\ud83d\udcd6 Full tutorial & setup guide:** flowpast.com"
},
"typeVersion": 1
},
{
"id": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"name": "Conversational Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-2414,
260
],
"parameters": {
"text": "={{ $node[\"Web Chat Trigger\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Actuas como una p\u00e1gina web, mediante la tool sheet tienes acceso a toda la p\u00e1gina web y todo lo que te pida el usaurio puedes consultarlo all\u00ed, responde el usuario en base a la info de all\u00ed"
},
"promptType": "define"
},
"typeVersion": 2.2
},
{
"id": "6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70",
"name": "OpenAI Chat Engine",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-2510,
496
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": []
},
"credentials": {
"openAiApi": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.2
},
{
"id": "0461df33-2d2f-42e2-a0d3-288bd78275f1",
"name": "Lightweight Memory",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
-2316,
462
],
"parameters": {
"contextWindowLength": 50
},
"typeVersion": 1.3
},
{
"id": "fdcce6e6-f00f-4f84-ac6e-2e181452d3ac",
"name": "Fetch Sheet Rows",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1956,
486
],
"parameters": {
"options": [],
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "[YOUR_ID]",
"cachedResultUrl": "",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 4.6
},
{
"id": "621aa928-83c5-48a4-8488-67c58fa1aec8",
"name": "Schema Check",
"type": "n8n-nodes-base.if",
"position": [
-3394,
574
],
"parameters": {
"options": [],
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "0adf46cd-5ca1-418e-a8b8-0571240e0efb",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json['Data schema'] }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "8df9234a-85ad-45b4-bc17-ad64edaab08a",
"name": "Sitemap Fetch",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1520,
716
],
"parameters": {
"url": "={{ $json.sitemapUrl }}",
"options": [],
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "0d18ebca-52f3-46ed-934c-44c9bad53dab",
"name": "XML Decode A",
"type": "n8n-nodes-base.xml",
"position": [
-1100,
978
],
"parameters": {
"options": []
},
"typeVersion": 1
},
{
"id": "39127cf7-f627-4fca-b1b7-c51b3656947d",
"name": "Random UA Picker",
"type": "n8n-nodes-base.code",
"position": [
-2146,
756
],
"parameters": {
"jsCode": "const userAgents = [\n  // Escritorio - Windows\n  \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Safari/537.36\",\n  \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0\",\n  \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36\",\n\n  // Escritorio - Mac\n  \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15\",\n  \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Safari/537.36\",\n\n  // M\u00f3vil - Android\n  \"Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Mobile Safari/537.36\",\n  \"Mozilla/5.0 (Linux; Android 9; Mi 9T Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Mobile Safari/537.36\",\n\n  // M\u00f3vil - iPhone\n  \"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1\",\n  \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1\"\n];\n\n// Escoge uno aleatorio\nconst randomUA = userAgents[Math.floor(Math.random() * userAgents.length)];\n\nreturn [\n  {\n    json: {\n      userAgent: randomUA\n    }\n  }\n];\n"
},
"typeVersion": 2
},
{
"id": "4260d45a-8705-483a-b17f-58211512ba59",
"name": "Request Error Halt",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1732,
580
],
"parameters": {
"errorMessage": "URL mal introducida, debes introducir con el siguiente formato: ejemplo.com"
},
"typeVersion": 1
},
{
"id": "59d0fe0a-9e27-4755-ac23-f46fa6d2aa95",
"name": "Sitemap Failure Halt",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1070,
512
],
"parameters": {
"errorMessage": "Sitemap no encontrado o acceso bloqueadp"
},
"executeOnce": false,
"typeVersion": 1
},
{
"id": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
"name": "Robots.txt Request",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1936,
754
],
"parameters": {
"url": "={{ $node[\"URL Validator Agent\"].json[\"output\"][\"URL\"] }}/robots.txt",
"options": [],
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "5948d577-4aea-4394-9b20-687f44efe5c8",
"name": "Parse Sitemap URL",
"type": "n8n-nodes-base.code",
"position": [
-1692,
722
],
"parameters": {
"jsCode": "// Simulaci\u00f3n: contenido del robots.txt como string, en n8n ser\u00e1 $input o $json dependiendo de tu configuraci\u00f3n\nconst robotsTxtContent = $input.first().json.data || \"\"; // Cambia esto por la variable correcta en n8n\n\n// Funci\u00f3n para extraer URL del sitemap\nfunction extractSitemapUrl(robotsTxt) {\n  // Buscamos l\u00ednea que empiece con \"Sitemap:\" (ignorando may\u00fasculas y espacios)\n  const lines = robotsTxt.split(/\\r?\\n/);\n  for (const line of lines) {\n    const match = line.match(/^\\s*Sitemap:\\s*(.+)$/i);\n    if (match) {\n      return match[1].trim();\n    }\n  }\n  return null; // No encontrado\n}\n\nconst sitemapUrl = extractSitemapUrl(robotsTxtContent);\n\n// Devolver JSON con la URL del sitemap (o null si no hay)\nreturn [{ json: { sitemapUrl } }];"
},
"typeVersion": 2
},
{
"id": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
"name": "Scan Options",
"type": "n8n-nodes-base.set",
"position": [
-2418,
718
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "71b9ad22-d418-4fff-92bb-dafd0818575d",
"name": "scan_pages",
"type": "boolean",
"value": true
},
{
"id": "42483a05-34f0-4cef-b404-dae43a7bee22",
"name": "scan_posts",
"type": "boolean",
"value": false
},
{
"id": "00a5ed31-dd44-4f9f-97f1-7aa4fe636afd",
"name": "category",
"type": "boolean",
"value": false
},
{
"id": "a2b0930f-8a9b-4f78-8d20-466366853b55",
"name": "tags",
"type": "boolean",
"value": false
}
]
}
},
"typeVersion": 3.4
},
{
"id": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"name": "URL Validator Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-3008,
748
],
"parameters": {
"text": "={{ $node[\"Web Chat Trigger\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Responde en formato JSON, el url si lo es, si no lo es pon cualquier valor y con una boolean que se indica respondiendo si es url o no (true or false)"
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 2.2
},
{
"id": "afdbed61-346e-44a6-aa69-23a2b7ecf553",
"name": "OpenAI Validator Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-3044,
958
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": []
},
"credentials": {
"openAiApi": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.2
},
{
"id": "175a77cd-bd0a-4849-8c9b-d36b4ddcecd9",
"name": "Web Chat Trigger",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-3758,
550
],
"webhookId": "01764d18-dae5-4dff-8e99-cb90682e9187",
"parameters": {
"public": true,
"options": {
"responseMode": "responseNodes"
},
"authentication": "basicAuth"
},
"credentials": {
"httpBasicAuth": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.3
},
{
"id": "16ef0fa6-4259-43bf-b74f-3dc70d4b54e3",
"name": "Structured Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-2868,
924
],
"parameters": {
"jsonSchemaExample": "{\n  \"URL\": \"example.com\",\n  \"URL_bool\":true\n}"
},
"typeVersion": 1.3
},
{
"id": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
"name": "URL Valid Check",
"type": "n8n-nodes-base.if",
"position": [
-2654,
770
],
"parameters": {
"options": [],
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "3851cb51-a282-4388-b4f6-1c1f68e8c7c5",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.output.URL_bool }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
"name": "Invalid URL Reply",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2380,
944
],
"parameters": {
"message": "Debes introducir una URL v\u00e1lida ejemplo: https://google.es",
"options": []
},
"typeVersion": 1
},
{
"id": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
"name": "Agent Response",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2096,
292
],
"parameters": {
"message": "={{ $json.output }}",
"options": []
},
"typeVersion": 1
},
{
"id": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
"name": "Sitemap Selector",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-3006,
1262
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-4o",
"cachedResultName": "GPT-4O"
},
"options": [],
"messages": {
"values": [
{
"role": "system",
"content": "=De aqu\u00ed saca y devuelve en formato JSON, los siguientes urls de los sitemap que sean true: \n\nPages: {{ $('Scan Options').item.json.scan_pages }}\n\nPosts: {{ $('Scan Options').item.json.scan_posts }}\n\nCategorias: {{ $('Scan Options').item.json.category }}\n\nTags: {{ $('Scan Options').item.json.tags }}\n\nSalida:\n\n{\n\"sitemap_page\":\"https://...\",\n\"sitemap_posts\":\"https://\"\n}"
},
{
"content": "=Sitemap: \n{{ $json.sitemapindex.sitemap[0].loc }}\n\n{{ $json.sitemapindex.sitemap[1].loc }}\n\n{{ $json.sitemapindex.sitemap[2].loc }}"
}
]
},
"jsonOutput": true
},
"credentials": {
"openAiApi": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.8
},
{
"id": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
"name": "XML Decode B",
"type": "n8n-nodes-base.xml",
"position": [
-2492,
1296
],
"parameters": {
"options": []
},
"typeVersion": 1
},
{
"id": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"name": "Batch Iterator",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1842,
1260
],
"parameters": {
"options": []
},
"typeVersion": 3
},
{
"id": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"name": "Page Summary Model",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-1218,
1422
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "GPT-5-NANO"
},
"options": [],
"messages": {
"values": [
{
"role": "system",
"content": "El usuario te mandara el contenido de la p\u00e1gina web, tu mision es sacar un resumen de la p\u00e1gina web, idioma de la p\u00e1gn, h1, enlaces internos (no imagenes ni css ni js) y enlaces externos y a\u00f1adirlos mediante la tool sheet a la db"
},
{
"content": "=URL: {{ $('Split URL Items').item.json.urls }}\n\n{{ $json.data }}"
}
]
}
},
"credentials": {
"openAiApi": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.8
},
{
"id": "6fb7c3fa-7851-49cd-8d0b-01df74a80f35",
"name": "Append Sheet Row",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1036,
1632
],
"parameters": {
"columns": {
"value": {
"Lang": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Lang', ``, 'string') }}",
"Page URL": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Page_URL', ``, 'string') }}",
"External URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('External_URLs', ``, 'string') }}",
"Internal URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Internal_URLs', ``, 'string') }}",
"Summary Content": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Summary_Content', ``, 'string') }}",
"H1 and hierarchy": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('H1_and_hierarchy', ``, 'string') }}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": [],
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "[YOUR_ID]",
"cachedResultUrl": "",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 4.6
},
{
"id": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
"name": "Schema Flag Update",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1636,
1180
],
"parameters": {
"columns": {
"value": {
"Data schema": "={{true}}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Data schema"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": [],
"operation": "appendOrUpdate",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "[YOUR_ID]",
"cachedResultUrl": "",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 4.6
},
{
"id": "c371c8db-e752-48fa-999d-4813aeb13f38",
"name": "Auxiliary HTTP Tool",
"type": "n8n-nodes-base.httpRequestTool",
"position": [
-2160,
466
],
"parameters": {
"url": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('URL', ``, 'string') }}",
"options": []
},
"typeVersion": 4.2
},
{
"id": "40169b8e-5948-4422-98d9-4bca87ccab73",
"name": "Combine URL Map",
"type": "n8n-nodes-base.code",
"position": [
-2286,
1300
],
"parameters": {
"jsCode": "// Obtenemos el array de URLs del JSON\nconst urlsArray = $input.first().json.urlset.url;\n\n// Creamos un objeto donde cada clave es \"url 1\", \"url 2\", etc.\nconst result = {};\nurlsArray.forEach((item, index) => {\n  if (item.loc) {\n    result[`url ${index + 1}`] = item.loc;\n  }\n});\n\n// Devolvemos el objeto\nreturn [\n  {\n    json: {\n      urls: result\n    }\n  }\n];\n"
},
"typeVersion": 2
},
{
"id": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
"name": "Split URL Items",
"type": "n8n-nodes-base.splitOut",
"position": [
-2046,
1262
],
"parameters": {
"options": [],
"fieldToSplitOut": "urls"
},
"typeVersion": 1
},
{
"id": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
"name": "Page Request",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
-1628,
1424
],
"parameters": {
"url": "={{ $('Split URL Items').item.json.urls }}",
"options": [],
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "71d974a6-4f60-4573-be09-7cbb09502fa3",
"name": "HTML Convert to MD",
"type": "n8n-nodes-base.markdown",
"position": [
-1394,
1388
],
"parameters": {
"html": "={{ $json.data }}",
"options": []
},
"typeVersion": 1
},
{
"id": "f076a729-8f40-4a3b-ad32-83837964c42c",
"name": "Page Sitemap Fetch",
"type": "n8n-nodes-base.httpRequest",
"position": [
-2688,
1298
],
"parameters": {
"url": "={{ $json.message.content.sitemap_page }}",
"options": [],
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
"name": "Retrieve Data Schema",
"type": "n8n-nodes-base.googleSheets",
"maxTries": 5,
"position": [
-3548,
574
],
"parameters": {
"options": [],
"filtersUI": {
"values": [
{
"lookupValue": "={{ true }}",
"lookupColumn": "Data schema"
}
]
},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "[YOUR_ID]",
"cachedResultUrl": "",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "credential-id",
"name": ""
}
},
"retryOnFail": true,
"typeVersion": 4.6,
"alwaysOutputData": true,
"waitBetweenTries": 3000
}
],
"pinData": [],
"connections": {
"Schema Check": {
"main": [
[
{
"node": "Conversational Agent",
"type": "main",
"index": 0
}
],
[
{
"node": "URL Validator Agent",
"type": "main",
"index": 0
}
]
]
},
"URL Valid Check": {
"main": [
[
{
"node": "Scan Options",
"type": "main",
"index": 0
}
],
[
{
"node": "Invalid URL Reply",
"type": "main",
"index": 0
}
]
]
},
"XML Decode B": {
"main": [
[
{
"node": "Combine URL Map",
"type": "main",
"index": 0
}
]
]
},
"XML Decode A": {
"main": [
[
{
"node": "Sitemap Selector",
"type": "main",
"index": 0
}
]
]
},
"Combine URL Map": {
"main": [
[
{
"node": "Split URL Items",
"type": "main",
"index": 0
}
]
]
},
"Scan Options": {
"main": [
[
{
"node": "Random UA Picker",
"type": "main",
"index": 0
}
]
]
},
"Page Request": {
"main": [
[
{
"node": "HTML Convert to MD",
"type": "main",
"index": 0
}
]
]
},
"Conversational Agent": {
"main": [
[
{
"node": "Agent Response",
"type": "main",
"index": 0
}
]
]
},
"Web Chat Trigger": {
"main": [
[
{
"node": "Retrieve Data Schema",
"type": "main",
"index": 0
}
]
]
},
"URL Validator Agent": {
"main": [
[
{
"node": "URL Valid Check",
"type": "main",
"index": 0
}
]
]
},
"Robots.txt Request": {
"main": [
[
{
"node": "Parse Sitemap URL",
"type": "main",
"index": 0
}
],
[
{
"node": "Request Error Halt",
"type": "main",
"index": 0
}
]
]
},
"Split URL Items": {
"main": [
[
{
"node": "Batch Iterator",
"type": "main",
"index": 0
}
]
]
},
"Random UA Picker": {
"main": [
[
{
"node": "Robots.txt Request",
"type": "main",
"index": 0
}
]
]
},
"Auxiliary HTTP Tool": {
"ai_tool": [
[
{
"node": "Conversational Agent",
"type": "ai_tool",
"index": 0
}
]
]
},
"Lightweight Memory": {
"ai_memory": [
[
{
"node": "Conversational Agent",
"type": "ai_memory",
"index": 0
}
]
]
},
"Sitemap Fetch": {
"main": [
[
{
"node": "XML Decode A",
"type": "main",
"index": 0
}
],
[
{
"node": "Sitemap Failure Halt",
"type": "main",
"index": 0
}
]
]
},
"Retrieve Data Schema": {
"main": [
[
{
"node": "Schema Check",
"type": "main",
"index": 0
}
]
]
},
"Batch Iterator": {
"main": [
[
{
"node": "Schema Flag Update",
"type": "main",
"index": 0
}
],
[
{
"node": "Page Request",
"type": "main",
"index": 0
}
]
]
},
"Page Sitemap Fetch": {
"main": [
[
{
"node": "XML Decode B",
"type": "main",
"index": 0
}
]
]
},
"Sitemap Selector": {
"main": [
[
{
"node": "Page Sitemap Fetch",
"type": "main",
"index": 0
}
]
]
},
"HTML Convert to MD": {
"main": [
[
{
"node": "Page Summary Model",
"type": "main",
"index": 0
}
]
]
},
"Page Summary Model": {
"main": [
[
{
"node": "Batch Iterator",
"type": "main",
"index": 0
}
]
]
},
"OpenAI Chat Engine": {
"ai_languageModel": [
[
{
"node": "Conversational Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"OpenAI Validator Model": {
"ai_languageModel": [
[
{
"node": "URL Validator Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Parse Sitemap URL": {
"main": [
[
{
"node": "Sitemap Fetch",
"type": "main",
"index": 0
}
]
]
},
"Structured Parser": {
"ai_outputParser": [
[
{
"node": "URL Validator Agent",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"Append Sheet Row": {
"ai_tool": [
[
{
"node": "Page Summary Model",
"type": "ai_tool",
"index": 0
}
]
]
},
"Fetch Sheet Rows": {
"ai_tool": [
[
{
"node": "Conversational Agent",
"type": "ai_tool",
"index": 0
}
]
]
}
}
}