nodejs爬虫批量翻译获取单词词义、音标

2018-11-5 Linyuting.cn

    前段时间帮朋友做的批量翻译单词表格的代码,贴出来,效率虽然不是特别高,但是基本稳定。可以从excel读取然后翻译后写会excel的另一个列,然后自动保存

1111.png

 

var http = require("http");
var fs = require("fs");
var xlsx = require('node-xlsx');
var progress = require('progress');

function download(url, callback) {
    http.get(url, function (res) {
        var data = "";
        res.on('data', function (chunk) {
            data += chunk;
        });
        res.on("end", function () {
            callback(data);
        });
    }).on("error", function () {
        callback(null);
    });
}

var get_pho = function (word, cb) {
    var url = "http://dict.cn/" + word;
    download(url, function (data) {
        //console.log(word);
        if (data) {
            var start = data.indexOf('<bdo lang="EN-US">') + 18;
            if (start < 0) {
                cb(1);
                return;
            }
            var len = data.indexOf('</bdo>') - start;
            if (len < 0) {
                cb(2);
                return;
            }
            var pho = data.substr(start, len).trim();
            if (pho.length > 0) {
                cb(pho);
            } else {
                cb(3);
            }
        } else {
            get_pho2(word, cb);
        }
    });
}

var get_pho2 = function (word, cb) {
    var url = "http://www.iciba.com/" + word;
    download(url, function (data) {
        if (data) {
            var start = data.indexOf('[', data.indexOf(']</span>') - 64) ;
            if (start < 0) {
                cb(1);
                return;
            }
            var len = data.indexOf('</span>', start) - start;
            if (len < 0) {
                cb(2);
                return;
            }
            var pho = data.substr(start, len).trim();
            if (pho.length > 0) {
                cb(pho);
            } else {
                cb(3);
            }
        } else {
            cb(4);
            console.log("error");
        }
    });
}

function is_num(val) {
    if (val === "" || val == null) {
        return false;
    }
    if (!isNaN(val)) {
        return true;
    } else {
        return false;
    }
}

var obj = xlsx.parse("./word.xlsx");
var words = obj[0].data;
var bar = new progress(' TranslatePhoing [:bar] :current/:total :percent :etas', {
    complete: '=',
    incomplete: ' ',
    width: 40,
    total: words.length
  });
var i = 0;
var error_time = 0;
var save = function (data) {
    obj[0].data = data;
    var buffer = xlsx.build(obj);
    fs.writeFile('./resut.xls', buffer, function (err) {
        if (err)
            throw err;
        console.log('Write to xls has finished');
    });
}

var loop_tran = function () {
    if (i < words.length) {
        var w='';
        try{
            w = words[i][1].trim();
        }
        catch(err){
            i++;
            loop_tran();
        }
        get_pho(w, function (e) {
            //console.log(i);
            if (!is_num(e)) {
                try{
                    bar.tick();
                    words[i][2] = e;
                    i++;
                    setTimeout(function () {
                        loop_tran();
                    }, 200);
                }catch(err){
                    save(words);
                    loop_tran();
                }
            } else {
                error_time++;
                if(error_time>5){
                    i++;
                    bar.tick();
                    error_time = 0;
                    loop_tran();
                    return;
                }
                setTimeout(function () {
                    loop_tran();
                }, 100);

            }
        });
    }
    else{
        save(words);
    }
}

loop_tran();

 

标签: JavaScript

发表评论:

本站由emlog驱动 粤ICP备15042739号