kw-crawler
TypeScript icon, indicating that this package has built-in type declarations

1.2.8 • Public • Published

Crawler

Build Status

crawler html or json

Install

npm install kw-crawler

How to use

HTMLCrawler

const { HTMLCrawler } = require("kw-crawler");

(async () => {
  const url = "https://github.com/shana0440/crawler";
  const crawler = new HTMLCrawler(url);
  crawler.setRule({
    name: "title",
    selector: "#readme > article > h1:nth-child(1)",
    callback: selector => selector.text()
  });

  const results = await crawler.getResults();
  assert(results["title"], "Crawler");
})();

JSONCrawler

const { JSONCrawler } = require("kw-crawler");

(async () => {
  const url = "https://api.github.com/search/repositories?q=shana0440/crawler";
  const crawler = new JSONCrawler(url);

  crawler.setHeader({
    name: "User-Agent",
    value: "request"
  });
  crawler.setRule({
    name: "items",
    selector: "items",
    callback: items =>
      items.map(item => ({
        name: item.name,
        full_name: item.full_name,
        html_url: item.html_url
      }))
  });
  let results = await crawler.getResults();
  console.log(results);
  /**
   * {
   *   "items": [{
   *     "name": "...",
   *     "full_name": "...",
   *     "html_url": "..."
   *   }]
   * }
   */
})();

Readme

Keywords

Package Sidebar

Install

npm i kw-crawler

Weekly Downloads

1

Version

1.2.8

License

MIT

Unpacked Size

48.8 kB

Total Files

16

Last publish

Collaborators

  • kwguo