n8n
Workflow
Scrape And Store Data From Multiple Website Pages
This n8n workflow automates tasks and integrates with various services. Scrape And Store Data From Multiple Website Pages - ready to import and run in your n8n instance.
n8n
workflow.json
About This Script
This n8n workflow automates tasks and integrates with various services. Scrape And Store Data From Multiple Website Pages - ready to import and run in your n8n instance.
Features:
- Makes HTTP requests to external APIs
- Custom JavaScript function processing
- Conditional logic and branching
Source Code
{
"nodes": [
{
"name": "On clicking 'execute'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-140,
820
],
"parameters": [],
"typeVersion": 1,
"id": "b30db5cd-b346-44c5-845e-628557d289c0"
},
{
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
320,
820
],
"parameters": {
"url": "https://www.theswiftcodes.com/browse-by-country/",
"options": [],
"responseFormat": "string"
},
"typeVersion": 1,
"id": "b7c60fbb-ee3d-4df0-8bf6-db84b9d09e3d"
},
{
"name": "HTML Extract",
"type": "n8n-nodes-base.htmlExtract",
"position": [
510,
820
],
"parameters": {
"options": [],
"extractionValues": {
"values": [
{
"key": "countries",
"attribute": "href",
"cssSelector": "ol > li > a",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1,
"id": "44cf5410-a1c3-488e-86c5-7bf73a24c3fa"
},
{
"name": "SplitInBatches",
"type": "n8n-nodes-base.splitInBatches",
"position": [
910,
820
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 1
},
"typeVersion": 1,
"id": "2f1a6296-94fa-46e5-b148-6dbbc6d22728"
},
{
"name": "HTTP Request1",
"type": "n8n-nodes-base.httpRequest",
"position": [
2250,
740
],
"parameters": {
"url": "={{$node[\"Set\"].json[\"url\"]}}",
"options": [],
"responseFormat": "file"
},
"typeVersion": 1,
"id": "566a5782-137d-48a0-9dfa-3c696a4c1213"
},
{
"name": "HTML Extract1",
"type": "n8n-nodes-base.htmlExtract",
"position": [
2750,
590
],
"parameters": {
"options": [],
"sourceData": "binary",
"extractionValues": {
"values": [
{
"key": "next_button",
"attribute": "href",
"cssSelector": "span.next > a",
"returnValue": "attribute"
},
{
"key": "names",
"cssSelector": "td.table-name",
"returnArray": true
},
{
"key": "swifts",
"cssSelector": "td.table-swift",
"returnArray": true
},
{
"key": "cities",
"cssSelector": "td.table-city",
"returnArray": true
},
{
"key": "branches",
"cssSelector": "td.table-branch",
"returnArray": true
}
]
}
},
"typeVersion": 1,
"id": "a9962741-6ebd-4cf3-ab63-7bcb98f2a0b4"
},
{
"name": "MongoDB1",
"type": "n8n-nodes-base.mongoDb",
"position": [
3280,
590
],
"parameters": {
"fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt",
"options": {
"dateFields": "createdAt,updatedAt"
},
"operation": "insert",
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"typeVersion": 1,
"id": "c43d2393-bb91-4722-ae0d-21ce4b9bfde7"
},
{
"name": "uProc",
"type": "n8n-nodes-base.uproc",
"position": [
1100,
820
],
"parameters": {
"tool": "getCountryNormalized",
"group": "geographic",
"country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\\/0-9]/g, \"\")}}",
"additionalOptions": []
},
"credentials": {
"uprocApi": "uproc-miquel"
},
"typeVersion": 1,
"id": "2c91f38e-5719-453f-8b88-76ca46943b52"
},
{
"name": "Prepare Documents",
"type": "n8n-nodes-base.function",
"position": [
2930,
590
],
"parameters": {
"functionCode": "var newItems = [];\n\nfor (i = 0; i < items[0].json.swifts.length; i++) {\n var item = {\n iso_code: $node['uProc'].json.message.code,\n country: $node['SplitInBatches'].json.country.replace(/[-\\/0-9]/g, \"\"),\n page: $node['Set Page to Scrape'].json.page,\n name: items[0].json.names[i],\n city: items[0].json.cities[i],\n branch: items[0].json.branches[i],\n swift_code: items[0].json.swifts[i],\n createdAt: new Date(),\n updatedAt: new Date()\n }\n newItems.push({json: item});\n}\n\nreturn newItems;\n\n"
},
"typeVersion": 1,
"id": "b4cc0ceb-3759-4d3b-9d35-6709b87e8eb8"
},
{
"name": "More Countries",
"type": "n8n-nodes-base.if",
"position": [
2810,
1100
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1,
"id": "d3f51765-9ade-40a5-aa34-8f5d0e012d59"
},
{
"name": "Set Page to Scrape",
"type": "n8n-nodes-base.functionItem",
"position": [
1290,
680
],
"parameters": {
"functionCode": "const staticData = getWorkflowStaticData('global');\n\nitem.page = \"\";\nif (staticData.page && staticData.page.length) {\n item.page = staticData.page;\n} else {\n item.page = $node['SplitInBatches'].json.country;\n}\nreturn item;\n"
},
"typeVersion": 1,
"id": "1cf544d8-ae6d-4feb-8ac6-905ba05b1e16"
},
{
"name": "More Pages",
"type": "n8n-nodes-base.if",
"position": [
3070,
1020
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$json[\"more_pages\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1,
"id": "48525220-7142-4589-9d67-80a5b5b2f9d1"
},
{
"name": "Set More Pages",
"type": "n8n-nodes-base.function",
"position": [
3470,
590
],
"parameters": {
"functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";\nvar more_pages = next_page.length > 0;\nconst staticData = getWorkflowStaticData('global');\n\n//all current items are after date: needs pagination\nif (more_pages) {\n staticData.page = next_page;\n} else {\n //don't check more items in previous pages;\n delete staticData.page;\n}\n\nreturn [\n {\n json: {\n more_pages: more_pages\n }\n }\n];\n"
},
"typeVersion": 1,
"id": "dc53407b-cfa1-44c3-9e08-bce9ed884d37"
},
{
"name": "Set",
"type": "n8n-nodes-base.set",
"position": [
1440,
680
],
"parameters": {
"values": {
"string": [
{
"name": "url",
"value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}"
}
]
},
"options": []
},
"typeVersion": 1,
"id": "304590cd-9260-4a3e-9d84-04976d639394"
},
{
"name": "Generate filename",
"type": "n8n-nodes-base.functionItem",
"position": [
1600,
610
],
"parameters": {
"functionCode": "var generateNameFromUrl = function(url){\n return url.replace(/[^a-z0-9]/gi, \"_\");\n}\n\nitem.file = generateNameFromUrl(item.url) + \".html\"\nreturn item;"
},
"typeVersion": 1,
"id": "a50f6271-a0d7-4e89-ab0d-868280b3c5f9"
},
{
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
1770,
610
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true,
"id": "a91d0c03-37fe-4c6c-9977-908cc72845e3"
},
{
"name": "File exists?",
"type": "n8n-nodes-base.if",
"position": [
1950,
610
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}",
"value2": "text/html"
}
]
}
},
"typeVersion": 1,
"id": "507c5dec-a023-4b1b-add0-aaa6d51f12df"
},
{
"name": "Write Binary File",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
2400,
740
],
"parameters": {
"fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}",
"dataPropertyName": "=data"
},
"typeVersion": 1,
"id": "d88fbab7-2d01-4cdd-8f82-743f8bb89d41"
},
{
"name": "Read Binary File1",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
2570,
590
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true,
"id": "22e87dc2-19b2-4519-b229-1bfcc96064b8"
},
{
"name": "Wait",
"type": "n8n-nodes-base.function",
"position": [
2090,
740
],
"parameters": {
"functionCode": "const waitTimeSeconds = 1;\n\nreturn new Promise((resolve) => {\n setTimeout(() => {\n resolve([]);\n }, waitTimeSeconds * 1000);\n});\n"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true,
"id": "ede3e147-e206-4cb6-8374-1661ebcf5db0"
},
{
"name": "Prepare countries",
"type": "n8n-nodes-base.function",
"position": [
700,
820
],
"parameters": {
"functionCode": "return items[0].json.countries.map(function(country) {\n return {\n json: {country: country}\n }\n});"
},
"typeVersion": 1,
"id": "8cfe9060-4e1a-4810-8a1f-8bba5f474bd9"
},
{
"name": "Create Directory",
"type": "n8n-nodes-base.executeCommand",
"position": [
70,
820
],
"parameters": {
"command": "mkdir -p /home/node/.cache/scrapper/"
},
"typeVersion": 1,
"continueOnFail": true,
"id": "777f8b6d-f4a5-4f87-b2c9-43350843f1a7"
},
{
"name": "MongoDB",
"type": "n8n-nodes-base.mongoDb",
"disabled": true,
"position": [
3100,
520
],
"parameters": {
"query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}",
"options": [],
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"executeOnce": false,
"typeVersion": 1,
"alwaysOutputData": true,
"id": "c282f216-9884-45b0-ac69-58c80d130b34"
}
],
"connections": {
"Set": {
"main": [
[
{
"node": "Generate filename",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "HTTP Request1",
"type": "main",
"index": 0
}
]
]
},
"uProc": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
]
]
},
"MongoDB": {
"main": [
[]
]
},
"MongoDB1": {
"main": [
[
{
"node": "Set More Pages",
"type": "main",
"index": 0
}
]
]
},
"More Pages": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
],
[
{
"node": "More Countries",
"type": "main",
"index": 0
}
]
]
},
"File exists?": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract": {
"main": [
[
{
"node": "Prepare countries",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "HTML Extract",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract1": {
"main": [
[
{
"node": "Prepare Documents",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request1": {
"main": [
[
{
"node": "Write Binary File",
"type": "main",
"index": 0
}
]
]
},
"More Countries": {
"main": [
[],
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Set More Pages": {
"main": [
[
{
"node": "More Pages",
"type": "main",
"index": 0
}
]
]
},
"SplitInBatches": {
"main": [
[
{
"node": "uProc",
"type": "main",
"index": 0
}
]
]
},
"Create Directory": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File": {
"main": [
[
{
"node": "File exists?",
"type": "main",
"index": 0
}
]
]
},
"Generate filename": {
"main": [
[
{
"node": "Read Binary File",
"type": "main",
"index": 0
}
]
]
},
"Prepare Documents": {
"main": [
[
{
"node": "MongoDB1",
"type": "main",
"index": 0
}
]
]
},
"Prepare countries": {
"main": [
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File1": {
"main": [
[
{
"node": "HTML Extract1",
"type": "main",
"index": 0
}
]
]
},
"Write Binary File": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
]
]
},
"Set Page to Scrape": {
"main": [
[
{
"node": "Set",
"type": "main",
"index": 0
}
]
]
},
"On clicking 'execute'": {
"main": [
[
{
"node": "Create Directory",
"type": "main",
"index": 0
}
]
]
}
},
"n8n_version": "1.5.0"
}
Requirements
n8n instance, API credentials for connected services
Tags
#n8n
#automation
#workflow