生活随笔
收集整理的這篇文章主要介紹了
Lucene 8.x 中文分词基本使用
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
Lucene中文分詞基本使用
本文章僅通過document進行簡單示例。
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;import java.io.IOException;
import java.nio.file.Paths;/*** @author liangzh* @create 2021-05-05-15:51*/
public class Lucene_Index_CRUD {public static void main(String[] args) {//添加索引到索引庫
// addIndex();searchIndex();
// updateIndex();
// deleteIndex();}private static void searchIndex() {try {Directory directory = FSDirectory.open(Paths.get("Lucene_db"));IndexReader indexReader = DirectoryReader.open(directory);IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// //TermQuery 不會對關鍵字進行分詞
// Query query = new TermQuery(new Term("title", "title1"));SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();QueryParser queryParser = new QueryParser("title", analyzer);Query query = queryParser.parse("廣州");TopDocs topDocs = indexSearcher.search(query, 10);ScoreDoc[] scoreDocs = topDocs.scoreDocs;System.out.println("獲取到的記錄數" + scoreDocs.length);for (int i=0;i< scoreDocs.length; i++){int id = scoreDocs[i].doc;float score = scoreDocs[i].score;System.out.println("id:" + id +"; score:" + score);Document document = indexSearcher.doc(id);String articleId = document.get("articleId");String title = document.get("title");String content = document.get("content");System.out.println("articleId: " + articleId + "; title:" + title + "; content:"+ content);}} catch (IOException | ParseException e) {e.printStackTrace();}}private static void deleteIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用單字分詞器Analyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//刪除某一文件indexWriter.deleteDocuments(new Term("articleId","0002"));//全部刪除//indexWriter.deleteAll();//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引刪除成功=========");} catch (IOException e) {e.printStackTrace();}}//更新原理是將原來的刪除,然后重新添加private static void updateIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用單字分詞器Analyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//在Lucene中,一個Document實例代表一條記錄Document document2 = new Document();//StringFild不會對數據進行分詞// Store.YES:會對數據進行分詞并存儲document2.add(new StringField("articleId", "0002", Field.Store.YES));document2.add(new TextField("title", "title1", Field.Store.YES));document2.add(new TextField("content", "廣州在哪里怎么走,有什么好玩的地方", Field.Store.YES));//將索引寫至索引庫indexWriter.updateDocument(new Term("articleId","0001"),document2);//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引更新成功=========");} catch (IOException e) {e.printStackTrace();}}private static void addIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用漢語分詞器SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//在Lucene中,一個Document實例代表一條記錄Document document = new Document();//StringFild不會對數據進行分詞// Store.YES:會對數據進行分詞并存儲document.add(new StringField("articleId", "0003", Field.Store.YES));document.add(new TextField("title", "廣州怎么走", Field.Store.YES));document.add(new TextField("content", "廣州在哪里怎么走,有什么好玩的地方", Field.Store.YES));//將索引寫至索引庫indexWriter.addDocument(document);//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引添加成功=========");} catch (IOException e) {e.printStackTrace();}}
}
總結
以上是生活随笔為你收集整理的Lucene 8.x 中文分词基本使用的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。