My favorites | Sign in
Project Logo
                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#!/usr/bin/perl

######################################################################
# Swignition/0.1-alpha15 - a toolkit for the semantic web
# Copyright (c) 2008, 2009 Toby Inkster.
######################################################################
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
######################################################################

package Swignition::Daemon;
use Net::Server::PreFork;
@ISA = qw(Net::Server::PreFork);

use strict;
use lib '.';
use Cache::FileCache; # Causing problems?
use Swignition::Export::Contact;
use Swignition::Export::Feed;
use Swignition::Export::Location;
use Swignition::Export::Calendar;
use Swignition::Export::CalComponent;
use Swignition::Export::Recording;
use Swignition::Export::Recipe;
use Swignition::GenericParser;
use Swignition::Misc;
use Getopt::Long
qw(:config no_ignore_case bundling permute);
use HTTP::Cache::Transparent;
use HTTP::Request;
use JSON 2.0;
use LWP::UserAgent;
use Pod::Usage;
use Digest::SHA1 qw(sha1_hex);

BEGIN { $ENV{PERL_JSON_BACKEND} = 'JSON::PP' }

HTTP::Cache::Transparent::init( {
BasePath => "/tmp/swignition.cache",
Verbose => 1,
MaxAge => 4, # hours
NoUpdate => 5, # seconds
});

our $ObjectCache = new Cache::FileCache({
'namespace' => 'Swignition.1',
'cache_root' => '/tmp/swignition.objcache'
});

Swignition::Daemon->run(
conf_file => '/etc/swignition/swignitiond.conf'
);

sub options
{
my $self = shift;
my $prop = $self->{'server'};
my $template = shift;

$self->SUPER::options($template);

$prop->{'format'} ||= undef;
$template->{'format'} = \$prop->{'format'};

foreach my $x (qw(erdf_strict_profiles grddl_fetch grddl_strict_profiles
p_comments p_erdf p_grddl p_rdf p_rdfa p_rdfx p_structure p_uf p_http
rdfa_strict_doctype rdfa_strict_version rdfa_strings uf_strict_profiles
p_role))
{
$prop->{opts}->{$x} ||= undef;
$template->{$x} = \$prop->{opts}->{$x};
}
}

sub process_request
{
my $self = shift;
my $autoclose = 0;
my $output_format = $self->{server}->{'format'};

my %opts = ();
foreach my $k (keys %{ $self->{server}->{opts} })
{ $opts{$k} = $self->{server}->{opts}->{$k}; }

eval
{
local $SIG{ALRM} = sub { die "Timed Out!\n" };
my $timeout = 180; # give the user 3 min to type a line
my $previous_alarm = alarm($timeout);
while( <STDIN> )
{
s/\r?\n$//;

if (/^SET FORMAT (.+)$/i)
{
$output_format = $1;
$self->log(4, "Set output format to '$1'.\n");
}
elsif (/^SET AUTOCLOSE (.+)$/i)
{
$autoclose = $1;
$self->log(4, "Set autoclose to '$1'.\n");
}
elsif (/^SET OPTION (.+) (.+)$/i)
{
$opts{lc($1)} = $2;
$self->log(4, "Set option $1 to '$2'.\n");
}
elsif (/^SET NOFOLLOW (.+)$/i)
{
push @{ $opts{'nofollow'} }, $1;
$self->log(4, "NoFollow for host $1.\n");
}
elsif (/^SHA1 (.+)$/i)
{
my ($pageUrl, $subjectUrl) = Swignition::Misc::url_split($1);
if (defined $subjectUrl)
{ print sha1_hex($subjectUrl)."\n"; }
else
{ print sha1_hex($pageUrl)."\n"; }
}
elsif (/^COGNIFY STDIN AS (.+)$/i
|| /^COGNIFY STDIN$/i)
{
my ($parser, $givenUrl, $pageUrl, $subjectUrl);
$givenUrl = $Swignition::Misc::defaultBaseURI;
if (length $1 > 0)
{
$givenUrl = $1;
$givenUrl = "http://localhost/~tai/$1.html"
if ($givenUrl =~ /^T:(.*)$/);
}
($pageUrl, $subjectUrl) = Swignition::Misc::url_split($givenUrl);

$opts{base} = $pageUrl;
my $stdin;
while (<STDIN>)
{
s/\r?\n$//;
last if /^\.$/;
$stdin .= "$_\n";
}

$self->log(2, "Cognifying '$subjectUrl' at page '$pageUrl' from STDIN.\n");
$parser = $self->get_parser($pageUrl, \%opts, $stdin);
print Swignition::Misc::do_export($output_format, $parser, $subjectUrl);
return if ($autoclose);
}
elsif (/^COGNIFY (.+)$/i)
{
my ($parser, $givenUrl, $pageUrl, $subjectUrl);
$givenUrl = $1;
$givenUrl = "http://localhost/~tai/$1.html"
if ($givenUrl =~ /^T:(.*)$/);
($pageUrl, $subjectUrl) = Swignition::Misc::url_split($givenUrl);

$self->log(2, "Cognifying '$subjectUrl' at page '$pageUrl'.\n");
$parser = $self->get_parser($pageUrl, \%opts);
print Swignition::Misc::do_export($output_format, $parser, $subjectUrl);
return if ($autoclose);
}
elsif (/^QUIT$/i)
{
$self->log(4, "Client quit.\n");
print "Bye!\r\n";
return;
}
else
{
$self->log(0, "Huh? $_\n");
print "Use 'COGNIFY http://example.org/foo.html'\r\n";
return;
}

alarm($timeout);
}
alarm($previous_alarm);
}
}

sub get_parser
{
my $this = shift;
my $url = shift;
my $o = shift;
my $stdin = shift;
my %opts = %$o;

# STDIN - no caching yet.
if ($stdin)
{
return Swignition::GenericParser::new_by_type($stdin, \%opts);
}

my $opt_string = '#';
foreach my $k (sort keys %opts)
{
next if ($k eq 'nofollow');
$opt_string .= $k.'='.$opts{$k}.'/' if (length $opts{$k} && $k =~ /\_/);
}

$opts{ua} = Swignition::Misc::get_ua;
$opts{request} = HTTP::Request->new(GET => $url);
$opts{response} = $opts{ua}->request($opts{request});

my ($parser, $found);
if ($opts{response}->header('X-Content-Unchanged'))
{
$parser = $ObjectCache->get($url.$opt_string);
$found = defined $parser;
$this->log(3, "Perfect cache hit for $url, opts $opt_string.")
if ($found);
}

$parser = Swignition::GenericParser::new_by_type($opts{response}->content, \%opts)
unless ($parser);

$ObjectCache->set($url.$opt_string, $parser, "20 minutes") unless ($found);
return $parser;
}

Show details Hide details

Change log

r146 by m...@tobyinkster.co.uk on Jan 31, 2009   Diff
Lots of propset stuff; a few updates to
S::DM::Node.
Go to: 
Project members, sign in to write a code review

Older revisions

r133 by m...@tobyinkster.co.uk on Jan 27, 2009   Diff
XHTML @role in new data model.
r125 by m...@tobyinkster.co.uk on Jan 25, 2009   Diff
Prepare for Swignition/0.1-alpha15
release.
r75 by m...@tobyinkster.co.uk on Jan 12, 2009   Diff
Handle nofollow in daemon.
All revisions of this file

File info

Size: 6229 bytes, 233 lines

File properties

svn:executable
*
svn:keywords
Id
Hosted by Google Code