该篇博客主要是 java 代码, 如需相应脚本及 java 连接 Elasticsearch 工具类代码, 请移步到上一篇博客
一, 创建连接执行 Linux 脚本工具类
- package com.yjlc.platform.utils.Elasticsearch;
- import ch.ethz.ssh2.Connection;
- import ch.ethz.ssh2.StreamGobbler;
- import java.io.*;
- /**
- * --------------------------------------------------------------
- * CopyRights(c)2018,YJLC
- * All Rights Reserved
- * <p>
- * FileName: SingletonUtil.java
- * Description:
- * Author: cyb
- * CreateDate: 2018-11-15
- * --------------------------------------------------------------
- */
- public class SingletonUtil {
- // 无参构造
- private SingletonUtil(){}
- private volatile static SingletonUtil instance;
- // 字符编码默认是 utf-8
- public static String DEFAULTCHART="UTF-8";
- public static Connection conn;
- private String ip;
- private String userName;
- private String userPwd;
- public static Boolean flag=false;
- // 有参构造
- public SingletonUtil(String ip, String userName, String userPwd) {
- this.ip = ip;
- this.userName = userName;
- this.userPwd = userPwd;
- }
- public SingletonUtil getInstance(String ip, String userName, String userPwd){
- if(instance==null){
- synchronized(SingletonUtil.class){
- // 防止多线程多次创建
- if(instance==null){
- instance=new SingletonUtil(ip,userName, userPwd);
- }
- }
- }
- flag= instance.login();// 调用登录方法
- return instance;
- }
- // 登录
- public Boolean login(){
- boolean flg=false;
- try {
- System.out.println("进入连接");
- conn = new Connection(ip);
- try {
- conn.connect();// 连接
- } catch (IOException e) {
- e.printStackTrace();
- }
- flg=conn.authenticateWithPassword(userName, userPwd);// 认证
- if (flg){
- System.out.println("认证成功!");
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return flg;
- }
- /**
- *@description: 纯文本格式返回
- *@author:cyb
- *@date: 2018-11-15 16:56
- *@param: in
- *@param: charset
- *@return: java.lang.String
- */
- public static String processStdout(InputStream in, String charset){
- InputStream stdout = new StreamGobbler(in);
- StringBuffer buffer = new StringBuffer();;
- try {
- BufferedReader br = new BufferedReader(new InputStreamReader(stdout,charset));
- String line=null;
- while((line=br.readLine()) != null){
- buffer.append(line+"\n");
- }
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return buffer.toString();
- }
- }
二, 控制层
- /**
- *@description: 开启爬虫
- *@author:cyb
- *@date: 2018-11-14 15:59
- *@param: id
- *@param: execute
- *@return: java.util.Map<java.lang.String,java.lang.Object>
- */
- @RequestMapping("openTask")
- @ResponseBody
- public Map<String,Object> openTask(String id,Boolean execute){
- Map<String,Object> map = new HashMap<>();
- // 根据 id 查询任务详细信息
- BsKnowledgeInfoDTO knowledgeInfoDTO= knolegeService.getDataInfoById(id);
- if(execute==true){
- execute=false;
- }else {
- execute=true;
- }
- knowledgeInfoDTO.setExecute(execute);// 修改任务的状态 (开启, 关闭)
- int k = knolegeService.updateDataInfo(knowledgeInfoDTO);
- // StringBuilder url = new StringBuilder(knowledgeInfoDTO.getPath()) ;// 爬虫目标路径
- StringBuilder url= new StringBuilder("https://mil.news.sina.com.cn/");
- StringBuilder reptileMethod= new StringBuilder("http://192.168.200.8:8000/news");// 爬虫方法 http://192.168.200.8:8000/news
- StringBuilder themeid= new StringBuilder("hottopic");// 存储索引名称
- //http://192.168.200.8:8000/news?themeid=hottopic&url=https://mil.news.sina.com.cn/history/2018-11-15/doc-ihmutuec0443667.shtml
- StringBuilder path =reptileMethod.append("?").append("themid=").append(themeid).append("&").append("url=").append(url);
- String ip="192.168.200.8";//Linux 路径
- String userName ="root";
- String userPwd ="yjlc20148";
- int w = knolegeService.reptile(path.toString(),ip,userName,userPwd);
- if(w==200){
- map.put("code",200);
- map.put("message","爬虫成功!");
- }else if(w==206){
- map.put("code",206);
- map.put("message","连接失败!");
- }
- return map;
- }
三, service 层 (此处省略了 service 接口层)
- /**
- *@description: 爬虫
- *@author:cyb
- *@date: 2018-11-15 20:52
- *@param: path 爬虫方法路径 + ES 存储索引 + 爬虫目标 url 合集
- *@param: ip 连接 ip 地址
- *@param: userName : 用户名
- *@param: userPwd: 用户密码
- *@return: int
- */
- @Override
- public int reptile(String path,String ip,String userName,String userPwd) {
- SingletonUtil singletonUtil = new SingletonUtil("192.168.200.8", "root","yjlc20148");
- singletonUtil.getInstance(ip, userName,userPwd);
- Boolean b =SingletonUtil.flag;// 看是否连接成功
- if(b==true){
- System.out.println("===== 第一个步骤 =====");
- Session session= null;// 打开一个会话
- try {
- session = singletonUtil.conn.openSession();
- session.execCommand("sh /opt/zc/linux_sina.sh");// 执行命令
- } catch (IOException e) {
- e.printStackTrace();
- }
- //TODO: 多条命令
- String result=singletonUtil.processStdout(session.getStdout(),singletonUtil.DEFAULTCHART);
- // 如果为得到标准输出为空, 说明脚本执行出错了
- if(StringUtils.isBlank(result)){
- System.out.println("脚本出错");
- result=singletonUtil.processStdout(session.getStderr(),singletonUtil.DEFAULTCHART);
- }
- System.out.println("第一个步骤脚本运行成功"+result);
- ConnectNetworkUtil connectNetworkUtil = new ConnectNetworkUtil();
- connectNetworkUtil.ConnectNetwork(path);
- System.out.println("采集成功!");
- session.close();// 关闭 session
- singletonUtil.conn.close();// 爬虫关闭连接
- return 200;// 爬虫成功
- }else {
- return 206;// 连接失败
- }
- }
以上代码已省略了 service 接口层和 java 连接 Elasticsearch 工具类 (上一篇博客中已写到), 以上代码仅供参考, 若代码中有不合理或者不规范的地方, 请各位指出, 技术在于交流!
来源: http://www.bubuko.com/infodetail-2850640.html