n8n Workflow

Scrape And Store Data From Multiple Website Pages

This n8n workflow automates tasks and integrates with various services. Scrape And Store Data From Multiple Website Pages - ready to import and run in your n8n instance.

n8n workflow.json

About This Script

This n8n workflow automates tasks and integrates with various services. Scrape And Store Data From Multiple Website Pages - ready to import and run in your n8n instance.
Features:
  • Makes HTTP requests to external APIs
  • Custom JavaScript function processing
  • Conditional logic and branching
Source Code
{
    "nodes": [
        {
            "name": "On clicking 'execute'",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                -140,
                820
            ],
            "parameters": [],
            "typeVersion": 1,
            "id": "b30db5cd-b346-44c5-845e-628557d289c0"
        },
        {
            "name": "HTTP Request",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                320,
                820
            ],
            "parameters": {
                "url": "https://www.theswiftcodes.com/browse-by-country/",
                "options": [],
                "responseFormat": "string"
            },
            "typeVersion": 1,
            "id": "b7c60fbb-ee3d-4df0-8bf6-db84b9d09e3d"
        },
        {
            "name": "HTML Extract",
            "type": "n8n-nodes-base.htmlExtract",
            "position": [
                510,
                820
            ],
            "parameters": {
                "options": [],
                "extractionValues": {
                    "values": [
                        {
                            "key": "countries",
                            "attribute": "href",
                            "cssSelector": "ol > li > a",
                            "returnArray": true,
                            "returnValue": "attribute"
                        }
                    ]
                }
            },
            "typeVersion": 1,
            "id": "44cf5410-a1c3-488e-86c5-7bf73a24c3fa"
        },
        {
            "name": "SplitInBatches",
            "type": "n8n-nodes-base.splitInBatches",
            "position": [
                910,
                820
            ],
            "parameters": {
                "options": {
                    "reset": false
                },
                "batchSize": 1
            },
            "typeVersion": 1,
            "id": "2f1a6296-94fa-46e5-b148-6dbbc6d22728"
        },
        {
            "name": "HTTP Request1",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                2250,
                740
            ],
            "parameters": {
                "url": "={{$node[\"Set\"].json[\"url\"]}}",
                "options": [],
                "responseFormat": "file"
            },
            "typeVersion": 1,
            "id": "566a5782-137d-48a0-9dfa-3c696a4c1213"
        },
        {
            "name": "HTML Extract1",
            "type": "n8n-nodes-base.htmlExtract",
            "position": [
                2750,
                590
            ],
            "parameters": {
                "options": [],
                "sourceData": "binary",
                "extractionValues": {
                    "values": [
                        {
                            "key": "next_button",
                            "attribute": "href",
                            "cssSelector": "span.next > a",
                            "returnValue": "attribute"
                        },
                        {
                            "key": "names",
                            "cssSelector": "td.table-name",
                            "returnArray": true
                        },
                        {
                            "key": "swifts",
                            "cssSelector": "td.table-swift",
                            "returnArray": true
                        },
                        {
                            "key": "cities",
                            "cssSelector": "td.table-city",
                            "returnArray": true
                        },
                        {
                            "key": "branches",
                            "cssSelector": "td.table-branch",
                            "returnArray": true
                        }
                    ]
                }
            },
            "typeVersion": 1,
            "id": "a9962741-6ebd-4cf3-ab63-7bcb98f2a0b4"
        },
        {
            "name": "MongoDB1",
            "type": "n8n-nodes-base.mongoDb",
            "position": [
                3280,
                590
            ],
            "parameters": {
                "fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt",
                "options": {
                    "dateFields": "createdAt,updatedAt"
                },
                "operation": "insert",
                "collection": "swifts.meetup"
            },
            "credentials": {
                "mongoDb": "db-mongo"
            },
            "typeVersion": 1,
            "id": "c43d2393-bb91-4722-ae0d-21ce4b9bfde7"
        },
        {
            "name": "uProc",
            "type": "n8n-nodes-base.uproc",
            "position": [
                1100,
                820
            ],
            "parameters": {
                "tool": "getCountryNormalized",
                "group": "geographic",
                "country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\\/0-9]/g, \"\")}}",
                "additionalOptions": []
            },
            "credentials": {
                "uprocApi": "uproc-miquel"
            },
            "typeVersion": 1,
            "id": "2c91f38e-5719-453f-8b88-76ca46943b52"
        },
        {
            "name": "Prepare Documents",
            "type": "n8n-nodes-base.function",
            "position": [
                2930,
                590
            ],
            "parameters": {
                "functionCode": "var newItems = [];\n\nfor (i = 0; i < items[0].json.swifts.length; i++) {\n  var item = {\n    iso_code: $node['uProc'].json.message.code,\n    country: $node['SplitInBatches'].json.country.replace(/[-\\/0-9]/g, \"\"),\n    page: $node['Set Page to Scrape'].json.page,\n    name: items[0].json.names[i],\n    city: items[0].json.cities[i],\n    branch: items[0].json.branches[i],\n    swift_code: items[0].json.swifts[i],\n    createdAt: new Date(),\n    updatedAt: new Date()\n  }\n  newItems.push({json: item});\n}\n\nreturn newItems;\n\n"
            },
            "typeVersion": 1,
            "id": "b4cc0ceb-3759-4d3b-9d35-6709b87e8eb8"
        },
        {
            "name": "More Countries",
            "type": "n8n-nodes-base.if",
            "position": [
                2810,
                1100
            ],
            "parameters": {
                "conditions": {
                    "string": [
                        {
                            "value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}",
                            "value2": "true"
                        }
                    ]
                }
            },
            "typeVersion": 1,
            "id": "d3f51765-9ade-40a5-aa34-8f5d0e012d59"
        },
        {
            "name": "Set Page to Scrape",
            "type": "n8n-nodes-base.functionItem",
            "position": [
                1290,
                680
            ],
            "parameters": {
                "functionCode": "const staticData = getWorkflowStaticData('global');\n\nitem.page = \"\";\nif (staticData.page && staticData.page.length) {\n  item.page = staticData.page;\n} else {\n  item.page = $node['SplitInBatches'].json.country;\n}\nreturn item;\n"
            },
            "typeVersion": 1,
            "id": "1cf544d8-ae6d-4feb-8ac6-905ba05b1e16"
        },
        {
            "name": "More Pages",
            "type": "n8n-nodes-base.if",
            "position": [
                3070,
                1020
            ],
            "parameters": {
                "conditions": {
                    "string": [
                        {
                            "value1": "={{$json[\"more_pages\"] + \"\"}}",
                            "value2": "true"
                        }
                    ]
                }
            },
            "typeVersion": 1,
            "id": "48525220-7142-4589-9d67-80a5b5b2f9d1"
        },
        {
            "name": "Set More Pages",
            "type": "n8n-nodes-base.function",
            "position": [
                3470,
                590
            ],
            "parameters": {
                "functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";\nvar more_pages = next_page.length > 0;\nconst staticData = getWorkflowStaticData('global');\n\n//all current items are after date: needs pagination\nif (more_pages) {\n  staticData.page = next_page;\n} else {\n  //don't check more items in previous pages;\n  delete staticData.page;\n}\n\nreturn [\n  {\n    json: {\n      more_pages: more_pages\n    }\n  }\n];\n"
            },
            "typeVersion": 1,
            "id": "dc53407b-cfa1-44c3-9e08-bce9ed884d37"
        },
        {
            "name": "Set",
            "type": "n8n-nodes-base.set",
            "position": [
                1440,
                680
            ],
            "parameters": {
                "values": {
                    "string": [
                        {
                            "name": "url",
                            "value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}"
                        }
                    ]
                },
                "options": []
            },
            "typeVersion": 1,
            "id": "304590cd-9260-4a3e-9d84-04976d639394"
        },
        {
            "name": "Generate filename",
            "type": "n8n-nodes-base.functionItem",
            "position": [
                1600,
                610
            ],
            "parameters": {
                "functionCode": "var generateNameFromUrl = function(url){\n    return url.replace(/[^a-z0-9]/gi, \"_\");\n}\n\nitem.file = generateNameFromUrl(item.url) + \".html\"\nreturn item;"
            },
            "typeVersion": 1,
            "id": "a50f6271-a0d7-4e89-ab0d-868280b3c5f9"
        },
        {
            "name": "Read Binary File",
            "type": "n8n-nodes-base.readBinaryFile",
            "position": [
                1770,
                610
            ],
            "parameters": {
                "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
            },
            "typeVersion": 1,
            "continueOnFail": true,
            "alwaysOutputData": true,
            "id": "a91d0c03-37fe-4c6c-9977-908cc72845e3"
        },
        {
            "name": "File exists?",
            "type": "n8n-nodes-base.if",
            "position": [
                1950,
                610
            ],
            "parameters": {
                "conditions": {
                    "string": [
                        {
                            "value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}",
                            "value2": "text/html"
                        }
                    ]
                }
            },
            "typeVersion": 1,
            "id": "507c5dec-a023-4b1b-add0-aaa6d51f12df"
        },
        {
            "name": "Write Binary File",
            "type": "n8n-nodes-base.writeBinaryFile",
            "position": [
                2400,
                740
            ],
            "parameters": {
                "fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}",
                "dataPropertyName": "=data"
            },
            "typeVersion": 1,
            "id": "d88fbab7-2d01-4cdd-8f82-743f8bb89d41"
        },
        {
            "name": "Read Binary File1",
            "type": "n8n-nodes-base.readBinaryFile",
            "position": [
                2570,
                590
            ],
            "parameters": {
                "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
            },
            "typeVersion": 1,
            "continueOnFail": true,
            "alwaysOutputData": true,
            "id": "22e87dc2-19b2-4519-b229-1bfcc96064b8"
        },
        {
            "name": "Wait",
            "type": "n8n-nodes-base.function",
            "position": [
                2090,
                740
            ],
            "parameters": {
                "functionCode": "const waitTimeSeconds = 1;\n\nreturn new Promise((resolve) => {\n  setTimeout(() => {\n    resolve([]);\n  }, waitTimeSeconds * 1000);\n});\n"
            },
            "typeVersion": 1,
            "continueOnFail": true,
            "alwaysOutputData": true,
            "id": "ede3e147-e206-4cb6-8374-1661ebcf5db0"
        },
        {
            "name": "Prepare countries",
            "type": "n8n-nodes-base.function",
            "position": [
                700,
                820
            ],
            "parameters": {
                "functionCode": "return items[0].json.countries.map(function(country) {\n  return {\n  json: {country: country}\n  }\n});"
            },
            "typeVersion": 1,
            "id": "8cfe9060-4e1a-4810-8a1f-8bba5f474bd9"
        },
        {
            "name": "Create Directory",
            "type": "n8n-nodes-base.executeCommand",
            "position": [
                70,
                820
            ],
            "parameters": {
                "command": "mkdir -p  /home/node/.cache/scrapper/"
            },
            "typeVersion": 1,
            "continueOnFail": true,
            "id": "777f8b6d-f4a5-4f87-b2c9-43350843f1a7"
        },
        {
            "name": "MongoDB",
            "type": "n8n-nodes-base.mongoDb",
            "disabled": true,
            "position": [
                3100,
                520
            ],
            "parameters": {
                "query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}",
                "options": [],
                "collection": "swifts.meetup"
            },
            "credentials": {
                "mongoDb": "db-mongo"
            },
            "executeOnce": false,
            "typeVersion": 1,
            "alwaysOutputData": true,
            "id": "c282f216-9884-45b0-ac69-58c80d130b34"
        }
    ],
    "connections": {
        "Set": {
            "main": [
                [
                    {
                        "node": "Generate filename",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Wait": {
            "main": [
                [
                    {
                        "node": "HTTP Request1",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "uProc": {
            "main": [
                [
                    {
                        "node": "Set Page to Scrape",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "MongoDB": {
            "main": [
                []
            ]
        },
        "MongoDB1": {
            "main": [
                [
                    {
                        "node": "Set More Pages",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "More Pages": {
            "main": [
                [
                    {
                        "node": "Set Page to Scrape",
                        "type": "main",
                        "index": 0
                    }
                ],
                [
                    {
                        "node": "More Countries",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "File exists?": {
            "main": [
                [
                    {
                        "node": "Read Binary File1",
                        "type": "main",
                        "index": 0
                    }
                ],
                [
                    {
                        "node": "Wait",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "HTML Extract": {
            "main": [
                [
                    {
                        "node": "Prepare countries",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "HTTP Request": {
            "main": [
                [
                    {
                        "node": "HTML Extract",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "HTML Extract1": {
            "main": [
                [
                    {
                        "node": "Prepare Documents",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "HTTP Request1": {
            "main": [
                [
                    {
                        "node": "Write Binary File",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "More Countries": {
            "main": [
                [],
                [
                    {
                        "node": "SplitInBatches",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Set More Pages": {
            "main": [
                [
                    {
                        "node": "More Pages",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "SplitInBatches": {
            "main": [
                [
                    {
                        "node": "uProc",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Create Directory": {
            "main": [
                [
                    {
                        "node": "HTTP Request",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Read Binary File": {
            "main": [
                [
                    {
                        "node": "File exists?",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Generate filename": {
            "main": [
                [
                    {
                        "node": "Read Binary File",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Prepare Documents": {
            "main": [
                [
                    {
                        "node": "MongoDB1",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Prepare countries": {
            "main": [
                [
                    {
                        "node": "SplitInBatches",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Read Binary File1": {
            "main": [
                [
                    {
                        "node": "HTML Extract1",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Write Binary File": {
            "main": [
                [
                    {
                        "node": "Read Binary File1",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Set Page to Scrape": {
            "main": [
                [
                    {
                        "node": "Set",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "On clicking 'execute'": {
            "main": [
                [
                    {
                        "node": "Create Directory",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    },
    "n8n_version": "1.5.0"
}
Requirements
n8n instance, API credentials for connected services
Tags
#n8n #automation #workflow
Quick Actions
More in n8n Workflows