My favorites | Sign in
Project Logo
                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#! /usr/bin/env python
#
# couchdb-xapian-indexer - Index couchdb databases with Xapian.
# Copyright (C) 2008 Paul J. Davis <paul.joseph.davis@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

import logging
import os
import sys
from optparse import OptionParser, make_option

import simplejson
import xapian

log = logging.getLogger(__name__)

class QueryHandler(object):
def __init__(self, dir, url, dbname):
self.idxfile = os.path.join(dir, '%s.idx' % dbname)
self.db = xapian.Database(self.idxfile)
self.enq = xapian.Enquire(self.db)
self.qp = xapian.QueryParser()
self.qp.set_stemmer(xapian.Stem("english"))
self.qp.set_database(self.db)
self.qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
def query(self, query):
self.db.reopen()
qs = query.get('query', {}).get('q', '')
offset = query.get('query', {}).get('offset', 0)
count = query.get('query', {}).get('count', 25)
q = self.qp.parse_query(qs)
self.enq.set_query(q)
ret = []
mset = self.enq.get_mset(offset, count)
log.info(dir(mset))
for m in mset:
ret.append({'id': m.document.get_data(), 'rank': m.rank + 1, 'score': m.percent})
return {'docs': ret, 'count': mset.get_matches_estimated()}

def queries():
line = sys.stdin.readline()
while line:
if not line:
return
obj = simplejson.loads(line)
yield obj
line = sys.stdin.readline()

def send(data):
sys.stdout.write(simplejson.dumps(data))
sys.stdout.write('\n')
sys.stdout.flush()

def main(dir, url, exclude):
if not os.path.isdir(dir):
os.mkdir(dir)
handlers = {}
for query in queries():
if query == [u'reset']:
send(True)
continue
log.info("Update: %s" % query)
dbname = query['db']
if dbname in exclude:
send({'code': '404', 'json': {'error': 'Database not indexed.'}, 'headers': {}})
continue
if dbname not in handlers:
handlers[dbname] = QueryHandler(dir, url, dbname)
ret = handlers[dbname].query(query)
send({'code': '200', 'json': ret, 'headers': {}})

if __name__ == '__main__':
options = [
make_option('-d', '--dir', dest='dir', metavar="DIRECTORY", default="./xapian",
help="Directory in which to store xapian databases. [%default]"),
make_option('-u', '--url', dest='url', metavar="URL", default="http://localhost:5984",
help="URL of the couchdb server. [%default]"),
make_option('-e', '--exclude', dest='exclude', metavar='DB_NAME', default=[],
help="Exclude a database from indexing. Can be used multiple times."),
make_option('-l', '--log', dest='log', metavar="FILE", default='./xapian/query.log',
help="Name of the log file to write to."),
]
parser = OptionParser("usage: %prog [OPTIONS]", option_list=options)
opts, args = parser.parse_args()
if len(args) != 0:
print "Unrecognized arguments: %s" % ' '.join(args)
parser.print_help()
exit(-1)
logging.basicConfig(filename=opts.log, level=logging.DEBUG, format="%(levelname)s %(message)s")
try:
main(os.path.abspath(opts.dir), opts.url, opts.exclude)
except:
log.exception("Querying shutting down due to high stress. Relaxation needed.")
Show details Hide details

Change log

r27 by paul.joseph.davis on Aug 08, 2008   Diff
Updated the scripts to use the new
_external interface.

Check out http://github.com/davisp/couchdb

Go to: 
Project members, sign in to write a code review

Older revisions

r24 by paul.joseph.davis on Jun 10, 2008   Diff
Added a call to
xapian.Database.reopen() before each
search
so that it'll hopefully not trigger
database modification
...
r22 by paul.joseph.davis on Jun 01, 2008   Diff
Adding the Xapian full text search
query server.

Make sure to check out issue
COUCHDB-74 for current
...
All revisions of this file

File info

Size: 4110 bytes, 106 lines

File properties

svn:executable
*
Hosted by Google Code