一、项目准备
1.我们还是采用前面的springboot框架进行本次项目开发;
2.相关前端的资源放入网盘中自行下载:百度网盘 请输入提取码
3.将网盘中前端资源放入springboot中,位置如下:
其中application.properties中也配置了对应的项目访问端口,以及关闭thymeleaf,端口号可以自行修改。
# 应用服务 WEB 访问端口server.port=8080# 关闭thymeleafspring.thymeleaf.cache=false
4.创建基本包文件:controller、service、untils,另外将前端所需要的vue以及axios包放入项目中,结构如下:
4.项目中添加相关依赖,
org.springframework.boot spring-boot-starter-thymeleaf org.jsoup jsoup 1.10.2
二、后端代码
1、until层
package com.elasticsearch.esapi.unitls;import com.elasticsearch.esapi.pojo.JDGoodsInfo;import org.jsoup.Jsoup;import org.jsoup.nodes.document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.net.URLEncoder;import java.util.ArrayList;import java.util.List;// 工具类,用于解析京东的html页面public class HtmlParEs { private static final String URLJD = "https://search.jd.com/Search?keyword="; public static List paraHtml(String param) throws IOException { param = URLEncoder.encode(param, "utf-8"); // 1.通过jsoup进行获取网页信息,返回整个页面对应的html页面 document document = Jsoup.parse(new URL(URLJD+param), 3000); // 2.解析页面,通过id,获取最近的标签 Element element = document.getElementById("J_goodsList"); // 3.解析每一个图片所有信息,获取所有的li标签 Elements lis = element.getElementsByTag("li"); // 4.遍历解析每个li标签 List goodsList = new ArrayList<>(); for (Element li:lis) { // 获取li下的img标签 String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = li.getElementsByClass("p-price").eq(0).text(); // text()获取当前标签下所有文本信息,包括子节点 String title = li.getElementsByClass("p-name").eq(0).text(); JDGoodsInfo jdGoodsInfo = new JDGoodsInfo(img,price,title); goodsList.add(jdGoodsInfo); } return goodsList; } public static void main(String[] args) throws IOException { new HtmlParEs().paraHtml("学习").forEach(System.out::println); }}
2、service层代码:
package com.elasticsearch.esapi.service;import com.alibaba.fastjson.JSON;import com.elasticsearch.esapi.pojo.JDGoodsInfo;import com.elasticsearch.esapi.unitls.HtmlParEs;import org.elasticsearch.action.bulk.BulkRequest;import org.elasticsearch.action.bulk.BulkResponse;import org.elasticsearch.action.get.GetRequest;import org.elasticsearch.action.get.GetResponse;import org.elasticsearch.action.index.IndexRequest;import org.elasticsearch.action.index.IndexResponse;import org.elasticsearch.action.search.SearchRequest;import org.elasticsearch.action.search.SearchResponse;import org.elasticsearch.client.RequestOptions;import org.elasticsearch.client.RestHighLevelClient;import org.elasticsearch.common.text.Text;import org.elasticsearch.common.unit.Timevalue;import org.elasticsearch.common.xcontent.XContentType;import org.elasticsearch.index.query.QueryBuilder;import org.elasticsearch.index.query.QueryBuilders;import org.elasticsearch.index.query.TermQueryBuilder;import org.elasticsearch.search.SearchHit;import org.elasticsearch.search.builder.SearchSourceBuilder;import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.beans.factory.annotation.Qualifier;import org.springframework.beans.factory.annotation.Value;import org.springframework.stereotype.Service;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.Map;import java.util.concurrent.TimeUnit;@Servicepublic class JDGoodsService { @Autowired private RestHighLevelClient restHighLevelClient; @Value("${es.name.one}") public String es1; // 解析网页获取数据,并且将数据放入es库 public Boolean parseHtmlAndSaveEs(String keyWord) throws IOException { // 1.获取页面数据 List jdGoodsInfos = HtmlParEs.paraHtml(keyWord); //2、批量保存至es库 BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m"); for (int i = 0; i < jdGoodsInfos.size(); i++) { bulkRequest.add(new IndexRequest(es1) .source(JSON.toJSonString(jdGoodsInfos.get(i)), XContentType.JSON)); } BulkResponse index = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !index.hasFailures(); } // 查询es数据,响应前端 public List
3.controller 层
package com.elasticsearch.esapi.controller;import com.elasticsearch.esapi.service.JDGoodsService;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Controller;import org.springframework.web.bind.annotation.GetMapping;import org.springframework.web.bind.annotation.ResponseBody;import java.io.IOException;import java.util.List;import java.util.Map;@Controllerpublic class JDGoodsController { @Autowired private JDGoodsService jdGoodsService; @GetMapping("/parseSaveEs") @ResponseBody public Boolean parseSaveEs(String keyWord) throws IOException { Boolean aBoolean = jdGoodsService.parseHtmlAndSaveEs(keyWord); return aBoolean; } @GetMapping("/getGoods") @ResponseBody public List> getGoods(String keyWord,int pageNo,int pageSize) throws IOException { List> mapList = jdGoodsService.searchGoods(keyWord, pageNo, pageSize); return mapList; } @GetMapping("/getGoodsHight") @ResponseBody public List> getGoodsHight(String keyWord,int pageNo,int pageSize) throws IOException { List> mapList = jdGoodsService.searchGoods(keyWord, pageNo, pageSize); return mapList; }}
package com.elasticsearch.esapi.controller;import org.springframework.stereotype.Controller;import org.springframework.web.bind.annotation.GetMapping;@Controllerpublic class IndexController { @GetMapping({"/","/index"}) public String getIndex(){ return "index"; }}
三、启动springboot项目
访问:http://localhost:8080/ 即可看到对应的前端页面
四、前端代码
在index.html页面添加代码如下
五、总结
1.本次仿的京东项目,首先是将京东的商品数据爬取出来,然后存入es库中;然后再从es库中将数据拿出来,响应给前端。