日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

Lucene 8.x 中文分词基本使用

發布時間:2024/1/18 编程问答 25 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Lucene 8.x 中文分词基本使用 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

Lucene中文分詞基本使用
本文章僅通過document進行簡單示例。

import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory;import java.io.IOException; import java.nio.file.Paths;/*** @author liangzh* @create 2021-05-05-15:51*/ public class Lucene_Index_CRUD {public static void main(String[] args) {//添加索引到索引庫 // addIndex();searchIndex(); // updateIndex(); // deleteIndex();}private static void searchIndex() {try {Directory directory = FSDirectory.open(Paths.get("Lucene_db"));IndexReader indexReader = DirectoryReader.open(directory);IndexSearcher indexSearcher = new IndexSearcher(indexReader); // //TermQuery 不會對關鍵字進行分詞 // Query query = new TermQuery(new Term("title", "title1"));SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();QueryParser queryParser = new QueryParser("title", analyzer);Query query = queryParser.parse("廣州");TopDocs topDocs = indexSearcher.search(query, 10);ScoreDoc[] scoreDocs = topDocs.scoreDocs;System.out.println("獲取到的記錄數" + scoreDocs.length);for (int i=0;i< scoreDocs.length; i++){int id = scoreDocs[i].doc;float score = scoreDocs[i].score;System.out.println("id:" + id +"; score:" + score);Document document = indexSearcher.doc(id);String articleId = document.get("articleId");String title = document.get("title");String content = document.get("content");System.out.println("articleId: " + articleId + "; title:" + title + "; content:"+ content);}} catch (IOException | ParseException e) {e.printStackTrace();}}private static void deleteIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用單字分詞器Analyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//刪除某一文件indexWriter.deleteDocuments(new Term("articleId","0002"));//全部刪除//indexWriter.deleteAll();//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引刪除成功=========");} catch (IOException e) {e.printStackTrace();}}//更新原理是將原來的刪除,然后重新添加private static void updateIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用單字分詞器Analyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//在Lucene中,一個Document實例代表一條記錄Document document2 = new Document();//StringFild不會對數據進行分詞// Store.YES:會對數據進行分詞并存儲document2.add(new StringField("articleId", "0002", Field.Store.YES));document2.add(new TextField("title", "title1", Field.Store.YES));document2.add(new TextField("content", "廣州在哪里怎么走,有什么好玩的地方", Field.Store.YES));//將索引寫至索引庫indexWriter.updateDocument(new Term("articleId","0001"),document2);//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引更新成功=========");} catch (IOException e) {e.printStackTrace();}}private static void addIndex() {try {//指定索引庫的位置Directory directory = FSDirectory.open(Paths.get("Lucene_db"));//創建分詞器,此處使用漢語分詞器SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();//創建IndexWriterConfig實例,通過IndexConfig實例配置索引創建模式IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);//通過IndexWriter進行索引的添加,刪除,更新IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//在Lucene中,一個Document實例代表一條記錄Document document = new Document();//StringFild不會對數據進行分詞// Store.YES:會對數據進行分詞并存儲document.add(new StringField("articleId", "0003", Field.Store.YES));document.add(new TextField("title", "廣州怎么走", Field.Store.YES));document.add(new TextField("content", "廣州在哪里怎么走,有什么好玩的地方", Field.Store.YES));//將索引寫至索引庫indexWriter.addDocument(document);//提交事務indexWriter.commit();//關閉流indexWriter.close();System.out.println("==========索引添加成功=========");} catch (IOException e) {e.printStackTrace();}} }

總結

以上是生活随笔為你收集整理的Lucene 8.x 中文分词基本使用的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。