node.js实现爬取王者农药官网皮肤(高清)
直接上代码
wCrawl.js
const cheerio = require("cheerio")
const fs = require("fs")
//获取html文档内容
const iconv = require("iconv-lite")
const axios = require("axios")
const {
rmAndMkdir
} = require("./rwFs")
let httpUrl = "https://pvp.qq.com/web201605/herolist.shtml"
/**创建输出目录
* 输出路径
* @param {String} path
*/
function crawlPictures(path) {
axios.get(httpUrl, {
responseType: 'arraybuffer'
}).then(res => {
const str = transCoding(res.data, 'gbk')
let $ = cheerio.load(str, {
decodeEntities: false
})
rmAndMkdir("./img")
console.log("开始爬取网页内容...");
$(".herolist li a").each(async (index, element) => {
let heroDetailUrl = "https://pvp.qq.com/web201605/" +
$(element).attr("href")
let $_ = cheerio.load(element, {
decodeEntities: false
})
let heroName = $_("img").attr('alt')
await sleep();
parseImg({
heroDetailUrl,
heroName
})
})
})
}
async function parseImg({
heroDetailUrl,
heroName
}) {
let {
data
} = await axios.get(heroDetailUrl, {
responseType: 'arraybuffer'
});
let $ = cheerio.load(transCoding(data, 'gbk'), {
decodeEntities: false
})
let downloadUrl = $(".wrapper .zk-con1").attr("style")
let res = /background\:url\('(.*?)'\) center 0/igs.exec(downloadUrl)[1]
fs.mkdir(`./img/${heroName}`, function (err) {
if (!err) {
$(".pic-pf ul").each((index, element) => {
let imgName = $(element).attr("data-imgname")
let arr = imgName.split("&")
arr.pop();
let lastArr = arr.map(item => {
let index = item.indexOf("|")
if (index !== -1) {
return item.slice(index + 1)
}
return item
})
for (let i = 0; i < lastArr.length; i++) {
imgDownload({
imgUrl: doHandleUrl("https:" + res, i + 1),
skinName: lastArr[i],
heroName
})
}
function doHandleUrl(url, number) {
let lastIndex = url.lastIndexOf('.')
if (lastIndex !== -1) {
return url.substr(0, lastIndex - 1) + number + url.substr(lastIndex)
}
}
})
}
})
}
async function imgDownload({
imgUrl,
skinName,
heroName
}) {
console.log(skinName);
let {
data: imgRes
} = await axios.get(imgUrl, {
responseType: 'stream'
})
let ws = fs.createWriteStream(`./img/${heroName}/${skinName}.jpg`, {
flags: "w",
})
imgRes.pipe(ws)
}
function transCoding(content, format) {
return iconv.decode(Buffer.from(content), format)
}
function sleep() {
return new Promise((resolve, reject) => {
setTimeout(resolve, 2000);
})
}
module.exports = crawlPictures
封装一些工具函数
rwFs.js
const fs = require("fs")
/**
* 写入文件
* @param {String} path
* @param {*}} content
* @returns
*/
function fsWrite(path, content) {
return new Promise((resolve, reject) => {
fs.writeFile(path, content, {
flag: "a",
encoding: "utf-8"
}, function (err) {
if (!err) {
console.log("写入成功");
resolve()
} else {
console.log(err);
reject()
}
})
})
}
/**
* 读取文件内容
* @param {Sting} url
* @returns
*/
function fsRead(url) {
return new Promise((resolve, reject) => {
fs.readFile(url, {
flag: 'r',
encoding: "utf-8"
}, function (err, data) {
if (err) {
reject(err)
} else {
resolve(data)
}
});
})
}
/**
* 判断文件是否存在
* @param {String} path
* @returns
*/
function isFileExisted(path) {
return new Promise((resolve, reject) => {
fs.access(path, (err) => {
if (err) {
console.log("文件不存在");
resolve(false)
} else {
resolve(true)
}
})
})
}
/**
* 删除指定路径下的目录文件夹
* @param {String} path
* @returns
*/
function rmDir(path) {
return new Promise((resolve, reject) => {
fs.rmdir(path, {
recursive: true
}, (err) => {
if (err) {
reject(err)
} else {
console.log("正在删除");
resolve()
}
})
})
}
/**
* 在指定路径下创建目录
* @param {String} path
* @returns
*/
function mkDir(path) {
return new Promise((resolve, reject) => {
fs.mkdir(path, (err) => {
if (err) {
reject(err)
} else {
console.log("正在创建目录");
resolve()
}
})
})
}
/**
* 重新创建对应路径下文件夹
* @param {String} path
*/
async function rmAndMkdir(path) {
try {
var isExist = await isFileExisted(path)
if (isExist) {
await rmDir(path);
await mkDir(path);
} else {
await mkDir(path);
}
} catch (err) {
console.log(err);
}
}
module.exports = {
fsWrite,
fsRead,
isFileExisted,
rmDir,
mkDir,
rmAndMkdir
}
index.js
javascript
const crawlPictures = require("./crawlPictures")
crawlPictures("./img")
node ./index.js
执行
还不错的小demo