node.js实现爬取王者农药官网皮肤(高清)

2021 年 11 月 5 日 星期五(已编辑)
/ ,
17
这篇文章上次修改于 2024 年 7 月 20 日 星期六,可能部分内容已经不适用,如有疑问可询问作者。

node.js实现爬取王者农药官网皮肤(高清)

直接上代码

wCrawl.js


const cheerio = require("cheerio")
const fs = require("fs")
//获取html文档内容  
const iconv = require("iconv-lite")
const axios = require("axios")
const {
  rmAndMkdir
} = require("./rwFs")
let httpUrl = "https://pvp.qq.com/web201605/herolist.shtml"
/**创建输出目录
 * 输出路径
 * @param {String} path 
 */
function crawlPictures(path) {
  axios.get(httpUrl, {
    responseType: 'arraybuffer'
  }).then(res => {
    const str = transCoding(res.data, 'gbk')
    let $ = cheerio.load(str, {
      decodeEntities: false
    })
    rmAndMkdir("./img")
    console.log("开始爬取网页内容...");
    $(".herolist li a").each(async (index, element) => {
      let heroDetailUrl = "https://pvp.qq.com/web201605/" +
        $(element).attr("href")
      let $_ = cheerio.load(element, {
        decodeEntities: false
      })
      let heroName = $_("img").attr('alt')
      await sleep();
      parseImg({
        heroDetailUrl,
        heroName
      })
    })
  })
}
async function parseImg({
  heroDetailUrl,
  heroName
}) {
  let {
    data
  } = await axios.get(heroDetailUrl, {
    responseType: 'arraybuffer'
  });
  let $ = cheerio.load(transCoding(data, 'gbk'), {
    decodeEntities: false
  })
  let downloadUrl = $(".wrapper .zk-con1").attr("style")

  let res = /background\:url\('(.*?)'\) center 0/igs.exec(downloadUrl)[1]
  fs.mkdir(`./img/${heroName}`, function (err) {
    if (!err) {
      $(".pic-pf ul").each((index, element) => {
        let imgName = $(element).attr("data-imgname")
        let arr = imgName.split("&")
        arr.pop();
        let lastArr = arr.map(item => {
          let index = item.indexOf("|")
          if (index !== -1) {
            return item.slice(index + 1)
          }
          return item
        })
        for (let i = 0; i < lastArr.length; i++) {
          imgDownload({
            imgUrl: doHandleUrl("https:" + res, i + 1),
            skinName: lastArr[i],
            heroName
          })
        }

        function doHandleUrl(url, number) {
          let lastIndex = url.lastIndexOf('.')
          if (lastIndex !== -1) {
            return url.substr(0, lastIndex - 1) + number + url.substr(lastIndex)
          }
        }
      })
    }
  })
}

async function imgDownload({
  imgUrl,
  skinName,
  heroName
}) {
  console.log(skinName);
  let {
    data: imgRes
  } = await axios.get(imgUrl, {
    responseType: 'stream'
  })
  let ws = fs.createWriteStream(`./img/${heroName}/${skinName}.jpg`, {
    flags: "w",

  })
  imgRes.pipe(ws)
}

function transCoding(content, format) {
  return iconv.decode(Buffer.from(content), format)
}

function sleep() {
  return new Promise((resolve, reject) => {
    setTimeout(resolve, 2000);
  })
}
module.exports = crawlPictures

封装一些工具函数

rwFs.js

const fs = require("fs")
/**
 * 写入文件
 * @param {String} path 
 * @param {*}} content 
 * @returns 
 */
function fsWrite(path, content) {
  return new Promise((resolve, reject) => {
    fs.writeFile(path, content, {
      flag: "a",
      encoding: "utf-8"
    }, function (err) {
      if (!err) {
        console.log("写入成功");
        resolve()
      } else {
        console.log(err);
        reject()
      }
    })
  })
}
/**
 * 读取文件内容
 * @param {Sting} url 
 * @returns 
 */
function fsRead(url) {
  return new Promise((resolve, reject) => {
    fs.readFile(url, {
      flag: 'r',
      encoding: "utf-8"
    }, function (err, data) {
      if (err) {
        reject(err)
      } else {
        resolve(data)
      }
    });
  })
}
/**
 * 判断文件是否存在
 * @param {String} path 
 * @returns
 */
function isFileExisted(path) {
  return new Promise((resolve, reject) => {
    fs.access(path, (err) => {
      if (err) {
        console.log("文件不存在");
        resolve(false)
      } else {
        resolve(true)
      }
    })
  })
}
/**
 * 删除指定路径下的目录文件夹
 * @param {String} path 
 * @returns 
 */
function rmDir(path) {
  return new Promise((resolve, reject) => {
    fs.rmdir(path, {
      recursive: true
    }, (err) => {
      if (err) {
        reject(err)
      } else {
        console.log("正在删除");
        resolve()
      }
    })
  })
}
/**
 * 在指定路径下创建目录
 * @param {String} path 
 * @returns 
 */
function mkDir(path) {
  return new Promise((resolve, reject) => {
    fs.mkdir(path, (err) => {
      if (err) {
        reject(err)
      } else {
        console.log("正在创建目录");
        resolve()
      }
    })
  })
}
/**
 * 重新创建对应路径下文件夹
 * @param {String} path 
 */
async function rmAndMkdir(path) {
  try {
    var isExist = await isFileExisted(path)
    if (isExist) {
      await rmDir(path);
      await mkDir(path);
    } else {
      await mkDir(path);
    }
  } catch (err) {
    console.log(err);
  }
}

module.exports = {
  fsWrite,
  fsRead,
  isFileExisted,
  rmDir,
  mkDir,
  rmAndMkdir
}

index.js javascript const crawlPictures = require("./crawlPictures") crawlPictures("./img")

node ./index.js 执行

还不错的小demo

使用社交账号登录

  • Loading...
  • Loading...
  • Loading...
  • Loading...
  • Loading...