官术网_书友最值得收藏!

3.2.11 寫索引集成到爬蟲

爬蟲把抓取的信息寫入索引:

        public class IndexDao {
            private IndexWriter indexWriter;


            public IndexDao(){
                try {
                    Directory directory = FSDirectory.open(new
        File("d:/lietu/index"));
                    Analyzer analyzer = new StandardAnalyzer();
                    indexWriter = new IndexWriter(directory, analyzer,
                                MaxFieldLength.LIMITED);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }


            public void save(GoodsInfo goodsInfo){
                Document doc = goodsInfo2Document(goodsInfo);


                try{
                    indexWriter.addDocument(doc);
                }catch(Exception e){
                    e.printStackTrace();
                }
            }


            public void close(){
                try {
                    indexWriter.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }


            public Document goodsInfo2Document(GoodsInfo ti) {
                Document doc = new Document();
                Field f = new Field("url", ti.getGoodsNameURL(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED, Field.TermVector.NO);
                doc.add(f);


                f = new Field("title", ti.getGoodsName(), Field.Store.YES,
                              Field.Index.ANALYZED,
                        Field.TermVector.WITH_POSITIONS_OFFSETS);
                doc.add(f);
    if (ti.getGoodsDescription() ! = null) {
        f = new Field("body", ti.getGoodsDescription(), Field.Store.YES,
                Field.Index.NOT_ANALYZED, Field.TermVector.NO);
        doc.add(f);
    }


    f = new Field("date", DateTools.dateToString(new Date(),
            DateTools.Resolution.DAY), Field.Store.YES,
            Field.Index.NOT_ANALYZED, Field.TermVector.NO);
    doc.add(f);


    f = new Field("priceInt", String.valueOf(ti.getPriceInteger()),
            Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);


    if (ti.getMoneyUnit() ! = null) {
        f = new Field("moneyUnit", ti.getMoneyUnit(), Field.Store.YES,
                Field.Index.NOT_ANALYZED, Field.TermVector.NO);
        doc.add(f);
    }


    try {
        URL website = new URL(ti.getGoodsNameURL().toString());
        f = new Field("fromwebsite", website.getHost(), Field.Store.YES,
                Field.Index.NOT_ANALYZED, Field.TermVector.NO);
        doc.add(f);
    } catch (MalformedURLException e1) {
        System.out.println("error url =" + ti.getGoodsNameURL().toString());
        e1.printStackTrace();
    }


    // 分類
    f = new Field("category", ti.getGoodsType(), Field.Store.YES,
            Field.Index.NOT_ANALYZED, Field.TermVector.NO);
    doc.add(f);


    // img
    if (ti.getImage() ! = null) {
        f = new Field("img", ti.getImage(), Field.Store.YES,
                Field.Index.NOT_ANALYZED, Field.TermVector.NO);
        doc.add(f);
    }
    // 制造廠名稱
    if (ti.getMfrName() ! = null) {
        f = new Field("brand", ti.getMfrName(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED, Field.TermVector.NO);
                doc.add(f);
            }
            // 商品型號 序列號
            if (ti.getMfrNumber() ! = null) {
                f = new Field("type", ti.getMfrNumber(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED, Field.TermVector.NO);
                doc.add(f);
            }


            // 價格
            if (ti.getGoodsPrice() ! = null) {
                f = new Field("price", ti.getGoodsPrice(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED, Field.TermVector.NO);
                doc.add(f);
            }
            return doc;
        }
    }
主站蜘蛛池模板: 图木舒克市| 奉贤区| 莱阳市| 临海市| 萍乡市| 金昌市| 榕江县| 龙里县| 南华县| 宁都县| 太仓市| 静乐县| 安国市| 神木县| 霍邱县| 府谷县| 金坛市| 龙川县| 徐州市| 丽江市| 凤阳县| 楚雄市| 牡丹江市| 安多县| 清远市| 奉贤区| 安乡县| 泸州市| 和龙市| 剑川县| 陇西县| 朝阳市| 霞浦县| 永平县| 皮山县| 碌曲县| 桑日县| 乌鲁木齐县| 乌兰浩特市| 扬州市| 嘉禾县|