上次搞全文检索的东西 ,还要追溯到2010了,当时个人觉得coreseek还不成熟,就用了原始的sphinx,请参考:sphinx mmseg mysql 中文分词,这次呢,决定用一下coreseek,看看有什么区别。
一,安装环境和工具
服务器用的是centos6.5 x86
coreseek 4.0.1下载地址:http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.0.1-beta.tar.gz
mysql 5.5.8下载地址:http://downloads.mysql.com/archives/community/
二,mmseg中文分词安装
1,旧版本安装方法
- # tar zxvf coreseek-4.0.1-beta
- # cd coreseek-4.0.1-beta/mmseg-3.2.14/
- # aclocal
- # libtoolize --force
- # automake --add-missing
- # autoconf
- # autoheader
- # make clean
- # ./configure --prefix=/usr/local/mmseg3
- # make
- # make install
2,新版本安装方法
- # ./bootstrap
- # ./configure --prefix=/usr/local/mmseg3
- # make
- # make install
mmseg-3.2.14,二种方法都可以安装成功,在编译时有报warning,但不影响安装。
3,新版本mmseg安装后,分词的东西就好了,不要用mmseg去生成了
- [root@localhost mmseg-3.2.14]# ll /usr/local/mmseg3/etc/
- 总用量 5432
- -rwxr-xr-x 1 root root 229 7月 31 01:44 mmseg.ini
- -rwxr-xr-x 1 root root 1826251 7月 31 01:44 unigram.txt
- -rwxr-xr-x 1 root root 3729280 7月 31 01:44 uni.lib
4,mmseg.ini配置的官方说明
- [mmseg]
- merge_number_and_ascii=0; #合并英文和数字 abc123/x
- number_and_ascii_joint=-; #定义可以连接英文和数字的字符
- compress_space=1; #暂不支持
- seperate_number_ascii=0; #就是将字母和数字打散
三,csft(sphinx)安装
1,旧版安装
- # cp /coreseek的解压目录/mmseg-3.2.14/src/*/*.h /usr/local/mmseg3/include/mmseg/
- # cd /home/tank/download/coreseek-4.0.1-beta
- # aclocal
- # libtoolize --force
- # automake --add-missing
- # autoconf
- # autoheader
- # perl -pi -e 's/lpthread/lpthread -liconv/g' src/Makefile*
- # make clean
- # ./configure --prefix=/usr/local/sphinx --enable-id64 \
- --with-mysql=/usr/local/mysql2 --with-mmseg \
- --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ \
- --with-mmseg-libs=/usr/local/mmseg3/lib/
- # make && make install
2,新版安装
- # cd /home/tank/download/coreseek-4.0.1-beta
- # ./buildconf.sh
- # ./configure --prefix=/usr/local/sphinx --enable-id64 \
- --with-mysql=/usr/local/mysql2 --with-mmseg \
- --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ \
- --with-mmseg-libs=/usr/local/mmseg3/lib/
- # make && make install
coreseek-4.0.1,二种安装方式都是可以的,但是安装时,还是会报错。
四,mysql插件 sphinxse的安装
1,将mysqlse复制到mysql程序目录storage
- # cp -R coreseek-4.0.1-beta/csft-4.0.1-beta/mysqlse /home/tank/download/mysql-5.5.8/storage/sphinx
2,cmake安装mysql 5.5.8
- #cd /home/tank/download/mysql-5.5.8
- # cmake -DCMAKE_INSTALL_PREFIX=/usr/local/mysql2 \
- -DMYSQL_UNIX_ADDR=/usr/local/mysql2/mysql.sock \
- -DCMAKE_BUILD_TYPE=Release -DWITH_SPHINX_STORAGE_ENGINE=1 \
- -DDEFAULT_CHARSET=utf8 \
- -DDEFAULT_COLLATION=utf8_general_ci \
- -DWITH_MYISAM_STORAGE_ENGINE=1 \
- -DWITH_INNOBASE_STORAGE_ENGINE=1 \
- -DWITH_MEMORY_STORAGE_ENGINE=1 \
- -DWITH_READLINE=1 \
- -DENABLED_LOCAL_INFILE=1 \
- -DMYSQL_DATADIR=/usr/local/mysql2/data \
- -DMYSQL_USER=mysql
- # make && make install
cmake安装mysql高版本,请参考:linux cmake 安装mysql5.5.11,以及更高版本
3,configure安装mysql5.5.8
- # cd /home/tank/download/mysql-5.5.8
- # sh BUILD/autorun.sh
- # ./configure --prefix=/usr/local/mysql2 \
- --with-plugins=partition,innobase,myisam,sphinx \
- --with-charset=utf8
- # make && make install
4,初始化数据库
- # cd /home/tank/download/mysql-5.5.8
- # chmod +x scripts/mysql_install_db
- # scripts/mysql_install_db --basedir=/usr/local/mysql2 \ #初始化数据
- --datadir=/usr/local/mysql2/data --user=mysql
- # cp support-files/my-medium.cnf /etc/my.cnf #copy配置文件
- # chown -R mysql:mysql /usr/local/mysql2 #更改权限
- # vim /etc/my.cnf #加上以下内容
- [mysqld]
- basedir = /usr/local/mysql2
- datadir = /usr/local/mysql2/data
- log-error = /usr/local/mysql2/mysql_error.log
- pid-file = /usr/local/mysql2/mysql.pid
- user = mysql
- tmpdir = /tmp
5,启动并登录mysql
- # cd /usr/local/mysql2/support-files/
- # cp ./mysql.server /etc/init.d/mysql5
- # /etc/init.d/mysql5 start #启动服务端
- # /usr/local/mysql2/bin/mysql #客户端连接
6,启用sphinxse
sphinxse
在这里和以前安装sphinxse,有一点不同的是,以前mysql编译安装好了以后,就会有,不用在用root登录去install了。
五,配置sphinx.conf
- # cd /usr/local/mysql2/support-files/
- # cp ./mysql.server /etc/init.d/mysql5
- # /etc/init.d/mysql5 start
- # cd /usr/local/sphinx/etc
- # cp sphinx.conf.dist sphinx.conf
- # vim sphinx.conf
- source src1
- {
- type = mysql
- sql_host = localhost
- sql_user = root //用名改一下
- sql_pass = 111111 //密码改一下,无密码,留空
- sql_db = test
- sql_query_pre = SET NAMES utf8 //这行注释去掉
- 。。。。。。。。。。。省略。。。。。。。。。。。。。。
- }
- 。。。。。。。。。。。省略。。。。。。。。。。。。。。
- index rt //在index里面添加以下三行,加入中文分词功能
- {
- type = rt
- path = /usr/local/sphinx/var/data/rt
- charset_dictpath = /usr/local/mmseg3/etc/ //添加
- charset_type = zh_cn.utf-8 //添加
- ngram_len = 0 //添加
- rt_field = title
- rt_field = content
- rt_attr_uint = gid
- }
- 。。。。。。。。。。。省略。。。。。。。。。。。。。。
六,启动sphinx
- //启动indexer
- [root@localhost etc]# /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --all
- Coreseek Fulltext 4.0 [ Sphinx 1.11-id64-dev (r2540)]
- Copyright (c) 2007-2011,
- Beijing Choice Software Technologies Inc (http://www.coreseek.com)
- using config file '/usr/local/sphinx/etc/sphinx.conf'...
- indexing index 'test1'...
- collected 4 docs, 0.0 MB
- sorted 0.0 Mhits, 100.0% done
- total 4 docs, 193 bytes
- total 0.012 sec, 15540 bytes/sec, 322.08 docs/sec
- indexing index 'test1stemmed'...
- collected 4 docs, 0.0 MB
- sorted 0.0 Mhits, 100.0% done
- total 4 docs, 193 bytes
- total 0.002 sec, 75097 bytes/sec, 1556.42 docs/sec
- skipping non-plain index 'dist1'...
- skipping non-plain index 'rt'...
- total 6 reads, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
- total 18 writes, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
- //启动searchd
- [root@localhost etc]# /usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf
- Coreseek Fulltext 4.0 [ Sphinx 1.11-id64-dev (r2540)]
- Copyright (c) 2007-2011,
- Beijing Choice Software Technologies Inc (http://www.coreseek.com)
- using config file '/usr/local/sphinx/etc/sphinx.conf'...
- listening on all interfaces, port=9312
- listening on all interfaces, port=9306
- precaching index 'test1'
- precaching index 'test1stemmed'
- WARNING: multiple addresses found for 'localhost', using the first one (ip=127.0.0.1)
- precaching index 'rt'
- precached 3 indexes in 0.001 sec
来源: http://blog.51yip.com/mysql/1641.html