大学IT网 - 最懂大学生的IT学习网站! QQ资料交流群:367606806
当前位置:大学IT网 > ASP.NET技巧 > Lucene.net入门学习(结合盘古分词)

Lucene.net入门学习(结合盘古分词)(2)

关键词:Lucene.net盘古分词  阅读(1173) 赞(17)

[摘要]本文是对Lucene.net入门学习(结合盘古分词)的讲解,对学习ASP.NET编程技术有所帮助,与大家分享。

如代码所示:
D:\Lucene\post\ 存储Lucene.net生成的索引文件,如下图

这些索引存储文件存储了PostInfo表中 PostId,Title,PostScore 三个字段信息。

需要注意的是:使用盘古分词操作时,需要将PanGu.xml和盘古分词自带的分词文件放入项目中,如下图:

Lucene.net执行搜索(结合盘古分词)

namespace LuceneNetStudy.Search
{
    /*code 释迦苦僧*/
    public partial class MainForm : Form
    {
        private string IndexDic = @"D:\Lucene\post\";

        public MainForm()
        {
            InitializeComponent();
        }

        private void btnSearch_Click(object sender, EventArgs e)
        {
            /*开启搜索用的后台线程*/
            BackgroundWorker backWorker = new BackgroundWorker();
            backWorker.DoWork += new DoWorkEventHandler(backWorker_DoWork);
            backWorker.RunWorkerAsync(txtKey.Text.Trim());
        }

        void backWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            string key = e.Argument as string;

            List<PostInfo> result = new List<PostInfo>();
            /*加入时间统计*/
            Stopwatch sw = new Stopwatch();
            sw.Start();

            /*创建 Lucene.net 搜索实例*/
            IndexSearcher search = new IndexSearcher(FSDirectory.Open(IndexDic), true);

            /*为搜索实例 加入搜索分词规则  来源 盘古分词*/
            key = GetKeyWordsSplitBySpace(key, new PanGuTokenizer());
            BooleanQuery bq = new BooleanQuery();
            if (!string.IsNullOrEmpty(key))
            {
                /*如果搜索关键字不为空  知道关键字搜索列为Title*/
                QueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Title" }, new PanGuAnalyzer());
                Query query = queryParser.Parse(key);
                bq.Add(query, Occur.MUST);
            }


            /*指定排序方式  按 PostScore 字段来排序*/
            List<SortField> sorts = new List<SortField>();
            SortField sf = new SortField("PostScore", SortField.DOUBLE, true);
            sorts.Add(sf);
            Sort sort = new Sort(sorts.ToArray());
            TopFieldDocs docs = search.Search(bq, null, search.MaxDoc, sort);
            int allCount = docs.TotalHits;
            /*获取匹配的前10条*/
            ScoreDoc[] hits = TopDocs(0, 10, docs);
            foreach (ScoreDoc sd in hits)//遍历搜索到的结果
            {
                try
                {
                    Document doc = search.Doc(sd.Doc);
                    var model = new PostInfo();
                    model.PostId = Guid.Parse(doc.Get("PostId"));
                    model.PostScore = double.Parse(doc.Get("PostScore"));
                    model.Title = doc.Get("Title");
                    result.Add(model);
                }
                catch
                {

                }
            }
            search.Close();
            search.Dispose();
            sw.Stop();
            if (result != null)
            {
                Invoke(new MethodInvoker(delegate()
                {
                    lblRunTime.Text = "花费: " + sw.Elapsed;

                    txtResult.Text = "";
                    foreach (PostInfo info in result)//遍历搜索到的结果
                    {
                        txtResult.Text += info.PostScore + "\t" + info.Title + "\r\n";
                    }
                }));
            }
        }

        public static ScoreDoc[] TopDocs(int start, int limit, TopFieldDocs docs)
        {
            int endIndex = 0;
            int hc = docs.TotalHits;
            if (hc - start > limit)
            {
                endIndex = start + limit;
            }
            else
            {
                endIndex = hc;
            }

            List<ScoreDoc> dl = new List<ScoreDoc>();
            var da = docs.ScoreDocs;
            for (int i = start; i < endIndex; i++)
            {
                dl.Add(da[i]);
            }
            return dl.ToArray();
        }

        static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder result = new StringBuilder();
            /*执行分词操作 一个关键字可以拆分为多个次和单个字*/
            ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }

                result.AppendFormat("{0} ", word.Word);
            }

            return result.ToString().Trim();
        }
    }
}
«上一页12下一页»


相关评论