ElasticSearch - 03 实战应用
笔记源自观看狂神说 ES 课程:https://www.bilibili.com/video/BV17a4y1x7zq?p=12&spm_id_from=pageDriver
集成SpringBoot
文档地址:
https://www.elastic.co/guide/en/elasticsearch/client/java-rest/7.6/java-rest-high.html
1、pom依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
2、初始化
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http")));
return client;
}
}
问题:一定要保证自己引入的依赖和本地环境一致。
3、创建客户端对象
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http")));
return client;
}
}
索引 API 测试
1、创建索引
@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient client;
// 测试索引创建 Request
@Test
void testCreateIndex() throws IOException {
// 1、创建索引请求
CreateIndexRequest request = new CreateIndexRequest("matuto_index");
// 2、客户端执行创建请求 IndicesClient,请求后获得响应
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println(createIndexResponse);
}
2、获取索引 - 判断索引是否存在
// 测试获取索引, 只能判断其存不存在
@Test
void testExistIndex() throws IOException {
GetIndexRequest request = new GetIndexRequest("matuto_index");
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
3、删除索引
// 测试删除索引
@Test
void deleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("matuto_index");
AcknowledgedResponse delete = client.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(delete);
}
文档 API 操作
1、测试添加文档
@Test
void testAddDocument() throws IOException {
// 创建对象
User user = new User("马图图", 23);
// 创建请求
IndexRequest request = new IndexRequest("matuto_index");
// 设置规则 put /matuto_index/_doc/1
request.id("1");
request.timeout(TimeValue.timeValueSeconds(1));
// 数据放入请求 json
request.source(JSON.toJSONString(user), XContentType.JSON);
// 客户端发送请求, 获取响应的结果
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse.toString());
System.out.println(indexResponse.status()); // 对应我们命令返回的状态
}
2、获取文档,判断是否存在
@Test
void testIsExists() throws IOException {
GetRequest getRequest = new GetRequest("matuto_index", "1");
// 不获取返回的 _source 的上下文
getRequest.fetchSourceContext(new FetchSourceContext(false));
getRequest.storedFields("_none_");
boolean exists = client.exists(getRequest, RequestOptions.DEFAULT);
System.out.println(exists);
}
3、获取文档的信息
@Test
void testGetDocument() throws IOException {
GetRequest getRequest = new GetRequest("matuto_index", "1");
GetResponse documentFields = client.get(getRequest, RequestOptions.DEFAULT);
// 打印文档的内容
System.out.println(documentFields.getSourceAsString());
System.out.println(documentFields); // 返回的全部内容和命令是一样的
}
4、更新文档的信息
@Test
void testUpdateDocument() throws IOException {
UpdateRequest request = new UpdateRequest("matuto_index", "1");
request.timeout("1s");
User user = new User("马图图小淘气", 18);
request.doc(JSON.toJSONString(user), XContentType.JSON);
UpdateResponse response = client.update(request, RequestOptions.DEFAULT);
System.out.println(response.status());
}
5、删除文档的信息
@Test
void testDeleteDocument() throws IOException {
DeleteRequest request = new DeleteRequest("matuto_index", "1");
request.timeout("1s");
DeleteResponse response = client.delete(request, RequestOptions.DEFAULT);
System.out.println(response.status());
}
6、批量操作(新增)
// 测试批量添加文档
@Test
void testBulkDocument() throws IOException {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
ArrayList<User> userList = new ArrayList<>();
userList.add(new User("matuto1", 3));
userList.add(new User("matuto2", 3));
userList.add(new User("matuto3", 3));
userList.add(new User("matuto4", 3));
userList.add(new User("matuto5", 3));
userList.add(new User("matuto6", 3));
// 批处理请i去
for (int i = 0; i < userList.size(); i++) {
// 批量更新和批量删除,就在这里修改对应的请求
bulkRequest.add(new IndexRequest("matuto_index")
.id("" + (i + 1)) // 不设置 id 会生成自动的id
.source(JSON.toJSONString(userList.get(i)),XContentType.JSON));
}
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
// 是否失败,返回 false 代表成功
System.out.println(bulkResponse.hasFailures());
}
7、查询
// 查询
// SearchRequest 搜索请求
// SearchSourceBuilder 条件构造
@Test
void testSearch() throws IOException {
SearchRequest searchRequest = new SearchRequest("matuto_index");
// 构建搜索条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 查询条件,使用 QueryBuilders 工具类来实现
// QueryBuilders.termQuery 精确匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "matuto1");
// QueryBuilders.matchAllQuery() 匹配所有
//MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
sourceBuilder.query(termQueryBuilder);
// 超时时间
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 放入请求
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse.getHits()));
System.out.println("==============================");
for (SearchHit hit: searchResponse.getHits().getHits()) {
System.out.println(hit.getSourceAsMap());
}
}
错误记录
java.lang.IncompatibleClassChangeError: Found interface org.elasticsearch.common.xcontent.ToXContentFragment, but class was expected
执行查询方法后,一直在 18 行报错,百度后发现是FastJson 的jar包与 es 包版本不兼容,这里我用的 es7.6.1 将 fastjson 包的版本改为1.2.62后,解决问题。
实战
1、获取数据
爬取数据:(获取请求返回的页面,筛选出我们想要的数据)
jsoup包
public List<Content> parseJD(String keywords) throws Exception {
// 获得请求 https://search.jd.com/Search?keyword=java
String url = "https://search.jd.com/Search?keyword=" + keywords;
// 解析网页 (Jsoup返回的Document就是Document对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 所有在js中能用的方法都能用
Element element = document.getElementById("J_goodsList");
// 获取所有的 li 元素
Elements elements = element.getElementsByTag("li");
ArrayList<Content> goodsList = new ArrayList<>();
// 获取信息
for (Element el: elements) {
// 关于图片特别多的网站,所有的图片都是延迟加载的! source-data-lazy-img
// data-lazy-img="//img10.360buyimg.com/n1/s200x200_jfs/t1/212423/25/5612/167421/619eec2fE1a9a57a4/6d0e3362f4d01726.jpg"
// String img = el.getElementsByTag("img").eq(0).attr("src");
String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setImg(img);
content.setPrice(price);
content.setTitle(title);
goodsList.add(content);
}
return goodsList;
}
2、获取到的数据放入 es 中
public Boolean parseContent(String keywords) throws Exception {
List<Content> contents = new HtmlParseUtil().parseJD(keywords);
// 把查询到的数据放入 es 中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
3、高亮搜索 + 分页
public List<Map<String, Object>> searchPageHighlight(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo <= 1) {
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分页功能
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
// 是否需要多个高亮
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("<span style = 'color:red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
// 执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
// 这是原来的结果
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
// 解析高亮的字段,将原来的字段换为我们高亮的字段即可
if (title != null) {
Text[] fragments = title.fragments();
StringBuilder n_title = new StringBuilder();
for (Text text: fragments) {
n_title.append(text);
}
// 将高亮字段替换掉原来的内容即可
sourceAsMap.put("title", n_title);
}
list.add(sourceAsMap);
}
return list;
}