January 22, 2026

arXiv to Notion, with Telegram digests you trust

Lisa Granqvist Partner Workflow Automation Expert

Get a free AI assessment → ⬇️ Use template

Checking arXiv “quickly” turns into 40 open tabs, half-read PDFs, and that nagging feeling you still missed the one paper everyone will talk about tomorrow. It’s not the reading that breaks you. It’s the hunting, sorting, and saving.

Founders tracking what might shift their roadmap feel this pain first. A research lead trying to keep a small team aligned gets it too. And if you do marketing for an AI product, arXiv Notion automation is an easy way to stay sharp without living in feeds.

This workflow pulls new AI papers from arXiv, files them into Notion with clean metadata and PDF links, generates Gemini “deep summaries,” then sends a daily Telegram digest you can actually trust. You’ll see how it works, what you need, and where people usually get stuck.

How This Automation Works

The full n8n workflow, from trigger to final output:

n8n Workflow Template: arXiv to Notion, with Telegram digests you trust

Click to explore

flowchart LR

    subgraph sg0["Scheduled Daily Flow"]
        direction LR
        n0["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>Code (Parse Gemini JSON in c.."]
        n1@{ icon: "mdi:swap-vertical", form: "rounded", label: "Split Results", pos: "b", h: 48 }
        n2@{ icon: "mdi:play-circle", form: "rounded", label: "Scheduled Daily Trigger", pos: "b", h: 48 }
        n3["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/httprequest.dark.svg' width='40' height='40' /></div><br/>HTTP Request"]
        n4@{ icon: "mdi:cog", form: "rounded", label: "Format Conversor", pos: "b", h: 48 }
        n5["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>Dedupe with Static Data"]
        n6@{ icon: "mdi:swap-vertical", form: "rounded", label: "Edit Fields for Notion (incl..", pos: "b", h: 48 }
        n7["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/notion.dark.svg' width='40' height='40' /></div><br/>Register to Notion Database"]
        n8@{ icon: "mdi:swap-vertical", form: "rounded", label: "Freeze page_id", pos: "b", h: 48 }
        n9["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/notion.dark.svg' width='40' height='40' /></div><br/>Append a block (adding 'bloc.."]
        n10["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/telegram.svg' width='40' height='40' /></div><br/>Send a text message"]
        n11@{ icon: "mdi:swap-vertical", form: "rounded", label: "Clean page_id", pos: "b", h: 48 }
        n12@{ icon: "mdi:robot", form: "rounded", label: "Analyze doc (Prompt Ultra-Pro)", pos: "b", h: 48 }
        n13["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/merge.svg' width='40' height='40' /></div><br/>Merge page_id & Summary"]
        n14@{ icon: "mdi:swap-vertical", form: "rounded", label: "Debug before append", pos: "b", h: 48 }
        n15["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/notion.dark.svg' width='40' height='40' /></div><br/>Append chunks as blocks"]
        n16@{ icon: "mdi:play-circle", form: "rounded", label: "When clicking ‘Execute workf..", pos: "b", h: 48 }
        n17@{ icon: "mdi:cog", form: "rounded", label: "End Telegram branch (no furt..", pos: "b", h: 48 }
        n18@{ icon: "mdi:swap-vertical", form: "rounded", label: "Process each paper (Gemini +..", pos: "b", h: 48 }
        n19@{ icon: "mdi:cog", form: "rounded", label: "Return to paper loop (next p..", pos: "b", h: 48 }
        n20@{ icon: "mdi:cog", form: "rounded", label: "Delay between paper summaries", pos: "b", h: 48 }
        n21["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>Prepare Notion payload (JSON)"]
        n22["<div style='background:#f5f5f5;padding:10px;border-radius:8px;display:inline-block;border:1px solid #e0e0e0'><img src='https://flowpast.com/wp-content/uploads/n8n-workflow-icons/code.svg' width='40' height='40' /></div><br/>Filter recent papers (last 2.."]
        n3 --> n4
        n11 --> n12
        n11 --> n13
        n1 --> n22
        n8 --> n9
        n4 --> n1
        n14 --> n15
        n10 --> n17
        n15 --> n20
        n5 --> n21
        n13 --> n0
        n2 --> n3
        n7 --> n8
        n7 --> n10
        n20 --> n19
        n21 --> n6
        n12 --> n13
        n22 --> n5
        n19 --> n18
        n6 --> n7
        n0 --> n14
        n16 --> n3
        n9 --> n18
        n18 --> n11
    end

    %% Styling
    classDef trigger fill:#e8f5e9,stroke:#388e3c,stroke-width:2px
    classDef ai fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
    classDef aiModel fill:#e8eaf6,stroke:#3f51b5,stroke-width:2px
    classDef decision fill:#fff8e1,stroke:#f9a825,stroke-width:2px
    classDef database fill:#fce4ec,stroke:#c2185b,stroke-width:2px
    classDef api fill:#fff3e0,stroke:#e65100,stroke-width:2px
    classDef code fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
    classDef disabled stroke-dasharray: 5 5,opacity: 0.5
    class n2,n16 trigger
    class n12 ai
    class n7,n9,n15 database
    class n3 api
    class n0,n5,n21,n22 code
    classDef customIcon fill:none,stroke:none
    class n0,n3,n5,n7,n9,n10,n13,n15,n21,n22 customIcon

The Problem: arXiv overload turns into missed papers

arXiv is an incredible resource, but it’s also a firehose. You open the AI feed intending to scan a handful of abstracts, and suddenly it’s a mini project: filter out duplicates, figure out what’s new since yesterday, click into PDFs, then save “the good ones” somewhere you will remember later. Even if you have a Notion database, getting papers in there usually means copy-pasting titles, IDs, authors, and links (and inevitably messing up one of them). The cost isn’t just time. It’s context switching, and it makes “keeping up” feel heavier than it should.

It adds up fast. Here’s where it breaks down in real life.

You keep re-finding the same papers because there’s no reliable dedupe across days.
Saving papers to Notion is manual, which means metadata gets inconsistent and searching later becomes annoying.
Even when you grab the PDFs, summarizing them takes long enough that you postpone it, then forget why you saved them.
Your “daily check” depends on willpower, so busy mornings quietly erase your research habit.

The Solution: arXiv → Notion pages + Gemini summaries + Telegram digest

This n8n workflow turns arXiv into a daily research assistant that runs at 08:00. It pulls the latest Artificial Intelligence papers from arXiv’s API, converts the feed into clean JSON, then filters to a recent time window so you’re not reprocessing yesterday’s list. Next, it removes duplicate records and builds a structured Notion page for each paper, including the core metadata and a direct PDF URL. After the Notion page exists, Gemini reads the PDF and produces a “deep research” summary in chunks, which are appended back into that same Notion page as readable blocks. Finally, the workflow posts a Telegram update with the title, a short abstract, and links to both the PDF and your Notion entry.

The workflow starts on a schedule (or manually when you’re testing). arXiv data gets cleaned, filtered, and deduped before anything is saved. Then Notion becomes your system of record, and Telegram becomes your daily reminder to actually look.

What You Get: Automation vs. Results

What This Workflow Automates

Results You’ll Get

Fetches new arXiv AI papers automatically every morning.
Converts XML feed entries into usable fields and cleans the output.
Filters by recency and removes duplicates before saving anything.
Creates one Notion page per paper, then appends Gemini summary blocks.

Save about 4–6 manual steps per paper (no more copy-paste routines).
A searchable Notion research hub with consistent titles, authors, dates, and PDF links.
Daily Telegram digest that nudges you to read, not scroll.
Summaries show up inside the same Notion page, so the “why it matters” isn’t lost.
Less re-reading and re-finding, because your archive stays clean.

Example: What This Looks Like

Say your team wants to track 10 new AI papers a day. Manually, it’s maybe 6 minutes per paper to open arXiv, grab the PDF link, copy the title/authors, paste into Notion, and write even a rough 3–4 sentence takeaway. That’s about an hour, every weekday. With this workflow, you spend a couple minutes skimming the Telegram digest, then open only the 1–2 Notion pages that look relevant. The rest is already filed and summarized for you.

What You’ll Need

n8n instance (try n8n Cloud free)
Self-hosting option if you prefer (Hostinger works well)
Notion for your paper database and summaries.
Telegram to post a daily digest to a channel.
Google Gemini API key (get it from Google AI Studio / Google Cloud console).

Skill level: Intermediate. You’ll mostly map fields and add credentials, plus light tweaking if your Notion schema is custom.

Don’t want to set this up yourself? Talk to an automation expert (free 15-minute consultation).

How It Works

A daily scheduled trigger starts the run. The workflow is set to fire at 08:00, and there’s also a manual trigger for testing so you’re not waiting until tomorrow morning.

arXiv data is fetched and cleaned. n8n pulls the AI feed via HTTP Request, converts the XML into JSON, and splits entries into individual paper items that are easier to process.

Recent-only filtering and deduping keeps things sane. Code nodes apply a time window and remove duplicates, so your Notion database doesn’t slowly fill with repeats or stale results.

Notion becomes the archive, then Gemini fills in the insight. For each paper, the workflow creates a Notion page, inserts a “summary” heading, asks Gemini to produce a deep summary from the PDF, parses the summary into chunks, and appends them as rich-text blocks. A Wait node pauses between papers so you don’t overwhelm downstream calls.

You can easily modify the arXiv query to focus on specific topics (like agents, retrieval, or diffusion) based on your needs. See the full implementation guide below for customization options.

Step-by-Step Implementation Guide

Step 1: Configure the Trigger Type

Set up both manual and scheduled triggers so you can run on-demand or daily.

Open Manual Execution Trigger to enable manual runs (no configuration required).
Open Scheduled Automation Start and confirm the rule interval is set to run at 8 (hour of day).
Verify both triggers connect to External API Request so either trigger can start the workflow.

Use Manual Execution Trigger for initial testing before enabling Scheduled Automation Start in production.

Step 2: Connect the Research Feed Source

Configure the arXiv API request and convert XML to JSON for downstream parsing.

Open External API Request and set URL to https://export.arxiv.org/api/query.
Under Query Parameters, set search_query to abs:"artificial intelligence", sortBy to submittedDate, sortOrder to descending, start to 0, and max_results to 100.
Open XML to JSON Mapper and enable Merge Attributes so XML attributes are preserved.
Open Split Feed Entries and set Field to Split Out to feed.entry.

Step 3: Filter and Deduplicate Papers

Keep only recent entries and remove duplicates across runs.

Open Filter Recent Papers and confirm the time window is const HOURS = 24; in the JS code.
Review the output fields built in Filter Recent Papers (e.g., title, summary, authors, published).
Open Remove Duplicate Records and keep $getWorkflowStaticData('global') for persistent deduplication.

⚠️ Common Pitfall: The dedupe logic requires the Remove Duplicate Records node to run in “Run Once for All Items” mode. Ensure this is enabled to avoid missing duplicates.

Step 4: Set Up Notion Payload and Field Mapping

Build the Notion-ready payload and map fields before creating records.

Open Compose Notion Payload and replace TU_DATABASE_ID with your Notion database ID.
Confirm Map Fields for Notion assignments such as title set to {{ $json.properties.title.title[0].text.content }} and published set to {{ $json.properties.published.date.start }}.
Keep the abstract_clean cleaning expression exactly as configured: {{ String($json.properties.abstract.rich_text[0].text.content || $json.summary).replace(/\\n/g, ' ').replace(/\n/g, ' ').replace(/\s+/g, ' ').trim() }}.
Confirm url_pdf builds the PDF URL with the expression {{ (() => { const src = ($json.link || $json.properties.arxiv_id.rich_text[0].text.content || '').trim(); if (!src) return ''; let u = src.replace('/abs/', '/pdf/'); if (!/^https?:\/\/i.test(u)) u = 'https://' + u.replace(/^\/+/, ''); if (!u.toLowerCase().endsWith('.pdf')) u += '.pdf'; return u;})() }}.

Credential Required: Connect your Notion credentials for Create Notion Record, Insert Summary Heading, and Append Summary Blocks. These nodes have no credentials configured yet.

Step 5: Configure Notion Record Creation and Iteration

Create database pages, capture page IDs, and prepare the loop for per-paper processing.

Open Create Notion Record and set Title to {{ $json.title }} with Resource set to databasePage.
In Create Notion Record properties, confirm each field uses the mapped expressions, such as {{ $json.abstract_clean }} for the abstract.
Open Store Page Identifier and keep Include Other Fields enabled, mapping =page_id to {{ $json.id }}.
Open Insert Summary Heading and confirm the block type is heading_2 with text Deep Research Summary.
Open Iterate Paper Processing to ensure batch processing is enabled for looped summary insertion.

Create Notion Record outputs to both Store Page Identifier and Dispatch Telegram Update in parallel, so both Notion and Telegram flows proceed simultaneously.

Step 6: Configure AI Summarization and Notion Block Appending

Generate deep summaries, parse them into chunks, and append to the Notion page while pacing requests.

Open Normalize Page Identifier and keep the expression {{ $json.page_id || $json.parent?.page_id || $json.page_id || $json.results?.[0]?.parent?.page_id || $json.id }} to standardize page IDs.
In Generate Deep Summary, confirm Resource is document and Document URLs is {{ $('Map Fields for Notion').item.json.url_pdf }}.
Credential Required: Connect your googlePalmApi credentials in Generate Deep Summary and verify the model is models/gemini-2.5-pro.
Confirm Normalize Page Identifier outputs to both Generate Deep Summary and Combine Page and Summary in parallel, and that Combine Page and Summary uses Mode combine with Combine By combineByPosition.
Open Parse Summary Chunks and leave the JSON parsing code intact to extract chunks into slice items.
Open Prepare Chunk Debug and confirm mappings like {{ $json.slice.length }} for dbg_slice_len.
Open Append Summary Blocks and set block Text Content to {{ $json.slice || '—' }} with Resource block.
Open Pause Between Summaries and set Amount to 0.4 to throttle Notion updates.
Ensure Loop Back to Next Paper connects to Iterate Paper Processing to continue the loop.

Step 7: Configure Telegram Notifications

Send a Telegram update when each Notion page is created.

Open Dispatch Telegram Update and set Chat ID to your target value, e.g., [YOUR_ID].
Keep the message template as configured, including expressions like {{ $('Create Notion Record').item.json.property_title }} and {{ $('Map Fields for Notion').item.json.abstract_clean }}.
Credential Required: Connect your telegramApi credentials in Dispatch Telegram Update.
Confirm Dispatch Telegram Update outputs to End Telegram Path.

Dispatch Telegram Update is triggered in parallel with Store Page Identifier after Create Notion Record, so notifications are sent immediately after page creation.

Step 8: Test and Activate Your Workflow

Run a manual test, verify outputs in Notion and Telegram, then activate the schedule.

Click Execute Workflow using Manual Execution Trigger and watch data pass through External API Request, XML to JSON Mapper, and Split Feed Entries.
Confirm new pages appear in Notion from Create Notion Record and that blocks are appended by Append Summary Blocks.
Verify Telegram messages arrive from Dispatch Telegram Update with valid links and summary text.
Once successful, activate the workflow and rely on Scheduled Automation Start for daily execution.

⚠️ Common Pitfall: If Notion pages or blocks fail to create, re-check that Notion credentials are connected to Create Notion Record, Insert Summary Heading, and Append Summary Blocks.

🔒

Unlock Full Step-by-Step Guide

Get the complete implementation guide + downloadable template

Common Gotchas

Notion credentials can expire or need specific permissions. If things break, check your Notion integration connection inside n8n and confirm the database is shared with that integration.
If you’re using Wait nodes or external rendering, processing times vary. Bump up the wait duration if downstream nodes fail on empty responses.
Gemini API limits and PDF accessibility can cause flaky summaries. If the PDF link is blocked or too slow, the “Generate Deep Summary” step may return partial output, so review the run history for that node first.

Frequently Asked Questions

How long does it take to set up this arXiv Notion automation automation?

Plan on about 45 minutes if your Notion database and Telegram bot are ready.

Do I need coding skills to automate arXiv Notion automation?

No. You’ll connect accounts, paste in API keys, and map a few Notion properties.

Is n8n free to use for this arXiv Notion automation workflow?

Yes. n8n has a free self-hosted option and a free trial on n8n Cloud. Cloud plans start at $20/month for higher volume. You’ll also need to factor in Gemini API usage, which depends on how many PDFs you summarize per day.

Where can I host n8n to run this automation?

Two options: n8n Cloud (managed, easiest setup) or self-hosting on a VPS. For self-hosting, Hostinger VPS is affordable and handles n8n well. Self-hosting gives you unlimited executions but requires basic server management.

Can I customize this arXiv Notion automation workflow for a different arXiv category or keyword search?

Yes, and it’s one of the best tweaks to make. Update the arXiv query in the “External API Request” HTTP Request node to target your preferred categories or search terms. Common customizations include narrowing to subtopics (like agents or RAG), increasing max_results on busy days, and changing the time-window logic in the “Filter Recent Papers” code node so you catch weekend releases.

Why is my Notion connection failing in this workflow?

Usually it’s permissions, not n8n. Make sure the Notion database is shared with your Notion integration, then re-check the credentials in n8n and re-select the database in the “Create Notion Record” node. If it fails only sometimes, look at the property mapping in “Map Fields for Notion,” because a mismatched select/tag value can cause the create call to error out.

How many papers can this arXiv Notion automation automation handle?

Practically, it handles “as many as you’re willing to summarize,” because the bottleneck is the PDF summarization step. On n8n Cloud Starter you’re limited by monthly executions, while self-hosting has no execution limit (your server and API quotas matter more). If you expect 50+ papers a day, add a stricter filter (keywords, categories, or max_results) and keep the Wait node so you don’t spike API errors. Most people start with 10–20 papers daily and adjust from there. Honestly, you want fewer, better papers anyway.

Is this arXiv Notion automation automation better than using Zapier or Make?

For this workflow, n8n has a few advantages: more complex logic with unlimited branching at no extra cost, a self-hosting option for unlimited executions, and native code/looping support for chunked summaries that Zapier tends to make awkward. Zapier or Make can work if you only want “RSS in, message out,” but the Notion + PDF summarization + append-in-chunks part is where you’ll feel the limits. If you want to tune deduping, time windows, or the summary format, n8n stays flexible. Talk to an automation expert if you’re not sure which fits.

Once this is running, your research “system” stops depending on motivation. The workflow does the collecting and summarizing, and you just show up for the few papers worth your attention.

Need Help Setting This Up?

Our automation experts can build and customize this workflow for your specific needs. Free 15-minute consultation—no commitment required.

Lisa Granqvist

Workflow Automation Expert

Expert in workflow automation and no-code tools.

{
"id": null,
"name": "Automated Research Summaries Pipeline",
"versionId": null,
"meta": {
"instanceId": "dd9fa2e064917d0c2b49dfa52f7f04fad05684bce0710604c065adf54a062603",
"templateId": null,
"templateCredsSetupCompleted": null
},
"nodes": [
{
"id": "flowpast-topbar-10519",
"name": "Flowpast Branding",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
20
],
"parameters": {
"color": 7,
"width": 1440,
"height": 80,
"content": "## Flowpast.com | Automation Workflow Library\n**\ud83d\udcd6 Full tutorial & setup guide:** flowpast.com"
},
"typeVersion": 1
},
{
"id": "f3f8e985-9250-499f-a0e3-8b41bb94b55f",
"name": "Manual Execution Trigger",
"type": "n8n-nodes-base.manualTrigger",
"position": [
1335,
160
],
"parameters": [],
"typeVersion": 1
},
{
"id": "95d19cfc-f7d9-4326-bda7-2deaebaedfdd",
"name": "Scheduled Automation Start",
"type": "n8n-nodes-base.scheduleTrigger",
"position": [
1340,
195
],
"parameters": {
"rule": {
"interval": [
{
"triggerAtHour": 8
}
]
}
},
"typeVersion": 1.2
},
{
"id": "befe2fc3-f00e-4157-a4ba-fbdbb4a003dc",
"name": "External API Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
955,
250
],
"parameters": {
"url": "https://export.arxiv.org/api/query",
"options": {
"response": {
"response": {
"responseFormat": "text"
}
},
"allowUnauthorizedCerts": true
},
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "search_query",
"value": "abs:\"artificial intelligence\""
},
{
"name": "sortBy",
"value": "submittedDate"
},
{
"name": "sortOrder",
"value": "descending"
},
{
"name": "start",
"value": "0"
},
{
"name": "max_results",
"value": "100"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "81ea7bd5-edac-43f7-9736-3f5fed6678cc",
"name": "XML to JSON Mapper",
"type": "n8n-nodes-base.xml",
"position": [
710,
210
],
"parameters": {
"options": {
"mergeAttrs": true
}
},
"typeVersion": 1
},
{
"id": "fedd41d3-62ac-4685-a66f-8c0e5a42a329",
"name": "Split Feed Entries",
"type": "n8n-nodes-base.splitOut",
"position": [
325,
255
],
"parameters": {
"options": [],
"fieldToSplitOut": "feed.entry"
},
"typeVersion": 1
},
{
"id": "34727c62-2923-4868-82c1-037fb449615f",
"name": "Filter Recent Papers",
"type": "n8n-nodes-base.code",
"position": [
75,
200
],
"parameters": {
"jsCode": "// === Configuraci\u00f3n ===\nconst HOURS = 24; // Cambia a 48 o 72 si quieres ampliar el rango\nconst now = new Date();\nconst threshold = new Date(now.getTime() - HOURS * 60 * 60 * 1000);\n\n// === Utilidad para limpiar texto ===\nconst get = v => Array.isArray(v) ? String(v[0] ?? '').trim() : String(v ?? '').trim();\n\n// === Procesar cada paper ===\nconst out = [];\n\nfor (const it of items) {\n  const e = it.json;\n\n  // Convertimos fechas\n  const published = new Date(get(e.published || e.updated));\n  if (isNaN(published) || published < threshold) continue; // filtra los m\u00e1s antiguos\n\n  // --- Autores ---\n  const authorList = Array.isArray(e.author) ? e.author : [e.author];\n  const authors = authorList\n    .filter(Boolean)\n    .map(a => get(a.name))\n    .join(', ');\n\n  // --- Categor\u00edas ---\n  const categoryList = Array.isArray(e.category) ? e.category : [e.category];\n  const categories = categoryList\n    .filter(Boolean)\n    .map(c => c?.['@_term'] || c?.['$'] || '')\n    .filter(Boolean)\n    .join(', ');\n\n  // --- A\u00f1adir al resultado ---\n  out.push({\n    json: {\n      arxiv_id: get(e.id),\n      title: get(e.title),\n      summary: get(e.summary),\n      authors,\n      categories,\n      published: published.toISOString(),\n      link: get(e.id)\n    }\n  });\n}\n\nreturn out;\n"
},
"typeVersion": 2
},
{
"id": "c4c53a6d-cfae-4817-b077-5c82f8381fff",
"name": "Remove Duplicate Records",
"type": "n8n-nodes-base.code",
"position": [
0,
405
],
"parameters": {
"jsCode": "// === DEDUPLICADOR PERSISTENTE ===\n// Guarda IDs ya vistos entre ejecuciones (funciona en \"Code\" con modo Run Once for All Items)\n\nconst store = $getWorkflowStaticData('global'); // \u2705 Correcta sintaxis en n8n actual\nif (!store.seen) store.seen = {}; // mapa id -> fecha\nconst seen = store.seen;\n\n// Dedupe dentro de la misma ejecuci\u00f3n\nconst batchSeen = new Set();\nconst out = [];\n\nfor (const it of items) {\n  const id = it.json.arxiv_id || it.json.link || it.json.id;\n  if (!id) continue;\n\n  if (batchSeen.has(id)) continue;  // ya apareci\u00f3 en esta ejecuci\u00f3n\n  batchSeen.add(id);\n\n  if (seen[id]) continue;           // ya apareci\u00f3 en ejecuciones previas\n\n  // marcar como nuevo\n  seen[id] = new Date().toISOString();\n  out.push(it);\n}\n\n// Limpiar registros viejos si excede 2000 entradas\nconst MAX = 2000;\nconst entries = Object.entries(seen)\n  .sort((a, b) => new Date(b[1]) - new Date(a[1]));\nif (entries.length > MAX) {\n  const keep = new Set(entries.slice(0, MAX).map(([k]) => k));\n  for (const k of Object.keys(seen)) {\n    if (!keep.has(k)) delete seen[k];\n  }\n}\n\nreturn out;\n"
},
"typeVersion": 2
},
{
"id": "9a6e800c-1fd2-45c3-ac36-2cfb95c259bc",
"name": "Compose Notion Payload",
"type": "n8n-nodes-base.code",
"position": [
385,
350
],
"parameters": {
"jsCode": "return items.map(item => ({\n  json: {\n    parent: { database_id: \"TU_DATABASE_ID\" },\n    properties: {\n      title: {\n        title: [{ text: { content: item.json.title || \"Sin t\u00edtulo\" } }]\n      },\n      arxiv_id: {\n        rich_text: [{ text: { content: item.json.arxiv_id || \"\" } }]\n      },\n      abstract: {\n        rich_text: [{ text: { content: item.json.summary || \"\" } }]\n      },\n      authors: {\n        rich_text: [{ text: { content: item.json.authors || \"\" } }]\n      },\n      categories: {\n        rich_text: [{ text: { content: item.json.categories || \"\" } }]\n      },\n      published: {\n        date: { start: item.json.published || null }\n      },\n      link: {\n        url: item.json.link || null\n      }\n    }\n  }\n}));\n"
},
"typeVersion": 2
},
{
"id": "d66eb078-88f1-4de0-b3ae-95ad34398c27",
"name": "Map Fields for Notion",
"type": "n8n-nodes-base.set",
"position": [
630,
400
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "4f50a382-fd77-425b-b583-5140078f1d15",
"name": "title",
"type": "string",
"value": "={{ $json.properties.title.title[0].text.content }}"
},
{
"id": "943afbe5-13a8-46e8-8904-6c9139e741a5",
"name": "published",
"type": "string",
"value": "={{ $json.properties.published.date.start }}"
},
{
"id": "2476f04e-1db7-41aa-9388-bc9d692883bf",
"name": "arxiv_id",
"type": "string",
"value": "={{ $json.properties.arxiv_id.rich_text[0].text.content }}"
},
{
"id": "ca36a055-966b-4b29-a164-cfee360fc999",
"name": "authors",
"type": "string",
"value": "={{ $json.properties.authors.rich_text[0].text.content }}"
},
{
"id": "e820d5fd-d900-4b00-82d3-773a9eafe6a6",
"name": "abstract",
"type": "string",
"value": "={{ $json.properties.abstract.rich_text[0].text.content }}"
},
{
"id": "1e5e2d1b-ae9b-4a7a-b6c3-aa4b80751a61",
"name": "abstract_clean",
"type": "string",
"value": "={{ \n  String($json.properties.abstract.rich_text[0].text.content || $json.summary)\n    .replace(/\\\\\\\\n/g, ' ')  // limpia \"\\n\" escapado\n    .replace(/\\\\n/g, ' ')    // limpia \"\\n\" si llega doble escapado\n    .replace(/\\n/g, ' ')     // limpia saltos reales\n    .replace(/\\s+/g, ' ')    // colapsa espacios m\u00faltiples\n    .trim() \n}}"
},
{
"id": "eece341f-e5cc-4d2f-8c0b-66796d7e5c45",
"name": "url_pdf",
"type": "string",
"value": "={{ (() => {\n     const src = ($json.link || $json.properties.arxiv_id.rich_text[0].text.content || '').trim();\n     if (!src) return '';\n     // Cambia /abs/ por /pdf/\n     let u = src.replace('/abs/', '/pdf/');\n     // Asegura protocolo\n     if (!/^https?:\\/\\//i.test(u)) u = 'https://' + u.replace(/^\\/+/, '');\n     // A\u00f1ade .pdf si no lo tiene\n     if (!u.toLowerCase().endsWith('.pdf')) u += '.pdf';\n     return u;\n})() }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "df174b58-d4a9-4bcf-b35d-ef564521408d",
"name": "Create Notion Record",
"type": "n8n-nodes-base.notion",
"position": [
1015,
365
],
"parameters": {
"title": "={{ $json.title }}",
"options": [],
"resource": "databasePage",
"databaseId": {
"__rl": true,
"mode": "list",
"value": "[YOUR_ID]",
"cachedResultUrl": "https://www.notion.so/[YOUR_ID]",
"cachedResultName": "ARXIV"
},
"propertiesUi": {
"propertyValues": [
{
"key": "arxiv_id|url",
"urlValue": "={{ $json.arxiv_id }}"
},
{
"key": "authors|rich_text",
"textContent": "={{ $json.authors }}"
},
{
"key": "published|date",
"date": "={{ $json.published }}",
"timezone": "US/Eastern"
},
{
"key": "abstract|rich_text",
"textContent": "={{ $json.abstract_clean }}"
},
{
"key": "url_pdf|url",
"urlValue": "={{ $json.url_pdf }}"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "2e7d8869-f34c-4ba8-a1f3-4e4721cf6916",
"name": "Store Page Identifier",
"type": "n8n-nodes-base.set",
"position": [
1265,
415
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "2b4c8c3c-d029-4d03-a74b-85c361e7c0ec",
"name": "=page_id",
"type": "string",
"value": "={{ $json.id }}"
}
]
},
"includeOtherFields": true
},
"typeVersion": 3.4
},
{
"id": "78861c59-f436-4959-9f77-2c919adf512d",
"name": "Insert Summary Heading",
"type": "n8n-nodes-base.notion",
"position": [
1330,
515
],
"parameters": {
"blockId": {
"__rl": true,
"mode": "id",
"value": "={{ $json.id }}"
},
"blockUi": {
"blockValues": [
{
"type": "heading_2",
"textContent": "Deep Research Summary"
}
]
},
"resource": "block"
},
"typeVersion": 2.2
},
{
"id": "84efcd13-b37e-47fd-95f1-385181986bb3",
"name": "Iterate Paper Processing",
"type": "n8n-nodes-base.splitInBatches",
"position": [
960,
570
],
"parameters": {
"options": []
},
"typeVersion": 3
},
{
"id": "ab41032c-3654-4ea5-83b3-3a690a597ed3",
"name": "Normalize Page Identifier",
"type": "n8n-nodes-base.set",
"position": [
715,
520
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "d928dfb2-65a2-449e-8297-d458471a5075",
"name": "=page_id",
"type": "string",
"value": "={{$json.page_id || $json.parent?.page_id || $json.page_id || $json.results?.[0]?.parent?.page_id || $json.id}}"
}
]
},
"includeOtherFields": true
},
"typeVersion": 3.4
},
{
"id": "276f2649-c421-4913-86e0-3789d7187a24",
"name": "Generate Deep Summary",
"type": "@n8n/n8n-nodes-langchain.googleGemini",
"maxTries": 2,
"position": [
315,
565
],
"parameters": {
"text": "You are an elite AI research analyst and science communicator.\n\nYour task:\nDeeply read and understand the attached research PDF and generate a structured, high-value explainer that will be stored inside a Notion child page.\n\nAudience:\n- Curious, intelligent founders and professionals\n- Not technical experts, but can understand advanced concepts when clearly explained\n\nPrimary mission:\nExplain the paper\u2019s ideas, meaning, and significance \u2014 NOT the metadata.\nDo NOT repeat title, authors, arXiv ID, year, or publication info. We already store those separately.\n\nTone:\n- Clear, sharp, human, insightful\n- Like Karpathy + Paul Graham + Ali Abdaal\n- No fluff, no hype, no academic filler\n- Metaphors and intuitive explanations welcome\n- Convey meaning and understanding, not jargon\n\nCONTENT YOU MUST PRODUCE (as narrative with brief bold section headers inside text):\n\n1) 3\u20135 core insights (executive brief)\n2) Core idea and motivation \u2014 explained simply\n3) Why this research matters now (context & importance)\n4) Key innovations & contributions\n5) Method \u2014 explained step-by-step in plain language\n6) Math / theory intuition (no formulas \u2014 explain what they *mean*)\n7) Experiments and evaluation \u2014 what was tested and why it matters\n8) Key results & what they prove (plain English significance)\n9) Limitations / where it may fail\n10) Real-world impact and applications\n11) Future work / open questions\n12) Closing takeaway \u2014 2-3 sentences summarizing the big picture\n\nOutput rules:\n- DO NOT repeat metadata (title, authors, arXiv ID, etc.)\n- DO NOT output anything outside JSON\n- NO chunk > 1900 characters\n- NO markdown blocks, no backticks inside chunks\n- Make the text feel like a human expert teaching\n\nReturn only valid JSON in this format:\n\n{\n  \"chunks\": [\n    \"chunk1 <= 1900 chars\",\n    \"chunk2 <= 1900 chars\",\n    \"...if needed\"\n  ]\n}\n\nBegin your analysis now.\n",
"modelId": {
"__rl": true,
"mode": "list",
"value": "models/gemini-2.5-pro",
"cachedResultName": "models/gemini-2.5-pro"
},
"options": [],
"resource": "document",
"documentUrls": "={{ $('Map Fields for Notion').item.json.url_pdf }}"
},
"credentials": {
"googlePalmApi": {
"id": "credential-id",
"name": ""
}
},
"retryOnFail": true,
"typeVersion": 1
},
{
"id": "86715411-218c-4910-99de-159a0270c1b1",
"name": "Combine Page and Summary",
"type": "n8n-nodes-base.merge",
"position": [
60,
505
],
"parameters": {
"mode": "combine",
"options": [],
"combineBy": "combineByPosition"
},
"typeVersion": 3.2
},
{
"id": "65ba3fb9-7d0d-4a66-b658-5d549366f188",
"name": "Parse Summary Chunks",
"type": "n8n-nodes-base.code",
"position": [
5,
720
],
"parameters": {
"jsCode": "// Toma todos los items del Merge\nconst items = $input.all();\n\n// 1) Localiza page_id y el texto de Gemini\nconst pageId =\n  items.find(it => it.json?.page_id)?.json.page_id ?? null;\n\nconst rawText =\n  items.find(it => it.json?.content)?.json?.content?.parts?.[0]?.text ?? \"\";\n\n// 2) Limpia fences ```json ... ``` y espacios\nconst cleaned = String(rawText)\n  .replace(/```json/gi, \"\")\n  .replace(/```/g, \"\")\n  .trim();\n\n// 3) Intenta parsear JSON. Si falla, deja objeto vac\u00edo.\nlet parsed = {};\ntry {\n  parsed = cleaned ? JSON.parse(cleaned) : {};\n} catch (_) {\n  parsed = {};\n}\n\n// 4) Extrae el array de trozos con tolerancia de nombres\nconst chunks =\n  Array.isArray(parsed.chunks) ? parsed.chunks :\n  Array.isArray(parsed.summary_chunks) ? parsed.summary_chunks :\n  Array.isArray(parsed.slices) ? parsed.slices :\n  [];\n\n// 5) Si no hay page_id o no hay chunks, no emites nada \u00fatil\nif (!pageId || !Array.isArray(chunks) || chunks.length === 0) {\n  // Opcional: emite un item de depuraci\u00f3n para ver qu\u00e9 lleg\u00f3\n  return [{\n    json: {\n      __parse_debug: true,\n      page_id_present: !!pageId,\n      got_chunks: Array.isArray(chunks) ? chunks.length : -1,\n      preview: cleaned.slice(0, 280)\n    }\n  }];\n}\n\n// 6) Emite un item por chunk: { page_id, slice, chunk_index, slice_index }\nreturn chunks.map((chunk, i) => ({\n  json: {\n    page_id: pageId,\n    slice: String(chunk).trim(),\n    chunk_index: i,\n    slice_index: 0,\n  }\n}));\n"
},
"typeVersion": 2
},
{
"id": "89127dd3-5486-430c-9677-e44f36269460",
"name": "Prepare Chunk Debug",
"type": "n8n-nodes-base.set",
"position": [
390,
675
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "a1465705-0160-46c2-8d10-50beede0eb11",
"name": "page_id",
"type": "string",
"value": "={{$json.db_page_id || $json.page_id}}"
},
{
"id": "9c3f2c3c-1139-4919-a45b-46db0f62659d",
"name": "slice",
"type": "string",
"value": "={{$json.slice}}"
},
{
"id": "f9db996a-ae16-4f8c-89ac-02f7bcc9dcee",
"name": "=dbg_page_id",
"type": "string",
"value": "={{$json.page_id}}"
},
{
"id": "ab9d751f-bd50-45d5-a560-65a8b8650ff1",
"name": "dbg_slice_len",
"type": "string",
"value": "={{$json.slice.length}}"
},
{
"id": "50a5acfd-0878-4340-8ba6-c996b5132dc6",
"name": "dbg_chunk",
"type": "string",
"value": "={{$json.chunk_index}}"
},
{
"id": "0c89088e-fd2b-4286-a1c3-f737efde3ae0",
"name": "dbg_slice_index",
"type": "string",
"value": "={{$json.slice_index}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "f12f15de-1c28-431b-8102-a54da0541b80",
"name": "Append Summary Blocks",
"type": "n8n-nodes-base.notion",
"onError": "continueRegularOutput",
"position": [
625,
730
],
"parameters": {
"blockId": {
"__rl": true,
"mode": "id",
"value": "={{$json.page_id}}"
},
"blockUi": {
"blockValues": [
{
"textContent": "={{$json.slice || '\u2014'}}\n"
}
]
},
"resource": "block"
},
"typeVersion": 2.2
},
{
"id": "1abceb48-d007-4eaf-83ab-8234ecd6db56",
"name": "Pause Between Summaries",
"type": "n8n-nodes-base.wait",
"position": [
1025,
680
],
"webhookId": "aba66457-b7f3-482e-807d-4e1a070fc829",
"parameters": {
"amount": 0.4
},
"typeVersion": 1.1
},
{
"id": "88d53b3c-e277-4cc9-91f9-bc083e41314c",
"name": "Loop Back to Next Paper",
"type": "n8n-nodes-base.noOp",
"position": [
1280,
735
],
"parameters": [],
"typeVersion": 1
},
{
"id": "d20e2281-2f80-4ac9-ab46-fa170379c4f8",
"name": "Dispatch Telegram Update",
"type": "n8n-nodes-base.telegram",
"position": [
1075,
120
],
"webhookId": "6208ba48-e8d8-42f7-90b5-c119d5fdbf50",
"parameters": {
"text": "=<b>{{ $('Create Notion Record').item.json.property_title }}</b>\n\n<b>{{ $('Create Notion Record').item.json.property_published.start }}</b>\n\n<b>{{ $('Create Notion Record').item.json.property_authors }}</b>\n\n<em>ABSTRACT (short):</em>\n{{ $('Map Fields for Notion').item.json.abstract_clean }}\n\n<b>Links</b>\n\u2022 arXiv: {{ $json.url || $json.arxiv_id || '' }}\n\u2022 Complete paper (pdf): {{ $json.property_url_pdf || '' }}\n\u2022 Deep Summary (Notion): {{ $('Create Notion Record').item.json.url }}\n",
"chatId": "[YOUR_ID]",
"additionalFields": {
"parse_mode": "=HTML",
"appendAttribution": false
}
},
"credentials": {
"telegramApi": {
"id": "credential-id",
"name": ""
}
},
"typeVersion": 1.2
},
{
"id": "d80aa1ea-5332-46bb-a03c-89a5655039aa",
"name": "End Telegram Path",
"type": "n8n-nodes-base.noOp",
"position": [
940,
165
],
"parameters": [],
"typeVersion": 1
}
],
"pinData": [],
"connections": {
"External API Request": {
"main": [
[
{
"node": "XML to JSON Mapper",
"type": "main",
"index": 0
}
]
]
},
"Normalize Page Identifier": {
"main": [
[
{
"node": "Generate Deep Summary",
"type": "main",
"index": 0
},
{
"node": "Combine Page and Summary",
"type": "main",
"index": 0
}
]
]
},
"Split Feed Entries": {
"main": [
[
{
"node": "Filter Recent Papers",
"type": "main",
"index": 0
}
]
]
},
"Store Page Identifier": {
"main": [
[
{
"node": "Insert Summary Heading",
"type": "main",
"index": 0
}
]
]
},
"XML to JSON Mapper": {
"main": [
[
{
"node": "Split Feed Entries",
"type": "main",
"index": 0
}
]
]
},
"Prepare Chunk Debug": {
"main": [
[
{
"node": "Append Summary Blocks",
"type": "main",
"index": 0
}
]
]
},
"Dispatch Telegram Update": {
"main": [
[
{
"node": "End Telegram Path",
"type": "main",
"index": 0
}
]
]
},
"Append Summary Blocks": {
"main": [
[
{
"node": "Pause Between Summaries",
"type": "main",
"index": 0
}
]
]
},
"Remove Duplicate Records": {
"main": [
[
{
"node": "Compose Notion Payload",
"type": "main",
"index": 0
}
]
]
},
"Combine Page and Summary": {
"main": [
[
{
"node": "Parse Summary Chunks",
"type": "main",
"index": 0
}
]
]
},
"Scheduled Automation Start": {
"main": [
[
{
"node": "External API Request",
"type": "main",
"index": 0
}
]
]
},
"Create Notion Record": {
"main": [
[
{
"node": "Store Page Identifier",
"type": "main",
"index": 0
},
{
"node": "Dispatch Telegram Update",
"type": "main",
"index": 0
}
]
]
},
"Pause Between Summaries": {
"main": [
[
{
"node": "Loop Back to Next Paper",
"type": "main",
"index": 0
}
]
]
},
"Compose Notion Payload": {
"main": [
[
{
"node": "Map Fields for Notion",
"type": "main",
"index": 0
}
]
]
},
"Generate Deep Summary": {
"main": [
[
{
"node": "Combine Page and Summary",
"type": "main",
"index": 1
}
]
]
},
"Filter Recent Papers": {
"main": [
[
{
"node": "Remove Duplicate Records",
"type": "main",
"index": 0
}
]
]
},
"Loop Back to Next Paper": {
"main": [
[
{
"node": "Iterate Paper Processing",
"type": "main",
"index": 0
}
]
]
},
"Map Fields for Notion": {
"main": [
[
{
"node": "Create Notion Record",
"type": "main",
"index": 0
}
]
]
},
"Parse Summary Chunks": {
"main": [
[
{
"node": "Prepare Chunk Debug",
"type": "main",
"index": 0
}
]
]
},
"Manual Execution Trigger": {
"main": [
[
{
"node": "External API Request",
"type": "main",
"index": 0
}
]
]
},
"Insert Summary Heading": {
"main": [
[
{
"node": "Iterate Paper Processing",
"type": "main",
"index": 0
}
]
]
},
"Iterate Paper Processing": {
"main": [
[],
[
{
"node": "Normalize Page Identifier",
"type": "main",
"index": 0
}
]
]
}
}
}