summaryrefslogtreecommitdiff
path: root/quoins/linkback.py
diff options
context:
space:
mode:
Diffstat (limited to 'quoins/linkback.py')
-rw-r--r--quoins/linkback.py184
1 files changed, 184 insertions, 0 deletions
diff --git a/quoins/linkback.py b/quoins/linkback.py
new file mode 100644
index 0000000..404789c
--- /dev/null
+++ b/quoins/linkback.py
@@ -0,0 +1,184 @@
1# Quoins - A TurboGears blogging system.
2# Copyright (C) 2008 James E. Blair <corvus@gnu.org>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17from xml.etree.ElementTree import XMLTreeBuilder
18from HTMLParser import HTMLParser, HTMLParseError
19import urllib, urllib2
20import re
21import xmlrpclib
22
23class HTMLLinkParser(HTMLParser):
24 def __init__(self):
25 self.links = []
26 self.curlink_href = None
27 self.curlink_title = None
28 HTMLParser.__init__(self)
29
30 def handle_starttag(self, tag, attrs):
31 if tag != 'a': return
32 for k,v in attrs:
33 if k=='href':
34 self.curlink_href = v
35 self.curlink_title = ''
36
37 def handle_data(self, data):
38 if self.curlink_href and data:
39 self.curlink_title += data
40
41 def handle_endtag(self, tag):
42 if tag != 'a': return
43 if self.curlink_href:
44 title = self.curlink_title
45 if not title:
46 title = self.curlink_href
47 self.links.append((self.curlink_href, title))
48 self.curlink_href = None
49 self.curlink_title = None
50
51class HTMLLinkBackParser(HTMLParser):
52 def __init__(self):
53 self.links = []
54 HTMLParser.__init__(self)
55
56 def handle_starttag(self, tag, attrs):
57 href = rel = None
58 for k,v in attrs:
59 if k=='href':
60 href = v
61 if k=='rel':
62 rel = v
63 if href and rel in ['pingback', 'trackback']:
64 self.links.append((rel, href))
65
66class LinkBackURI(object):
67 def __init__(self, uri):
68 self.uri = uri
69
70class TrackBackURI(LinkBackURI):
71 def send(self, title='', excerpt='', url='', blog_name=''):
72 try:
73 msg = self._send(title, excerpt, url, blog_name)
74 except:
75 return 'Error sending TrackBack to %s' % self.uri
76 if msg:
77 return 'Remote error %s sending TrackBack to %s'%(msg, self.uri)
78 return 'Sent TrackBack to %s' % self.uri
79
80 def _send(self, title, excerpt, url, blog_name):
81 builder = XMLTreeBuilder()
82 data = urllib.urlencode(dict(
83 title=title,
84 excerpt=excerpt,
85 url=url,
86 blog_name=blog_name,
87 ))
88
89 req = urllib2.Request(self.uri, data)
90 response = urllib2.urlopen(req)
91 res = response.read()
92
93 builder.feed(res.strip())
94 tree = builder.close()
95 error = tree.find('error')
96 error = int(error.text)
97 if error:
98 message = tree.find('message')
99 return message.text
100 return None
101
102class PingBackURI(LinkBackURI):
103 def send(self, source_url='', target_url=''):
104 try:
105 msg = self._send(source_url, target_url)
106 except:
107 raise
108 return 'Error sending PingBack to %s' % self.uri
109 if msg:
110 return 'Remote error %s sending PingBack to %s'%(msg, self.uri)
111 return 'Sent PingBack to %s' % self.uri
112
113 def _send(self, source_url, target_url):
114 server = xmlrpclib.ServerProxy(self.uri)
115
116 try:
117 print 'ping', source_url, target_url
118 ret = server.pingback.ping(source_url, target_url)
119 print 'ok', ret
120 return None
121 except xmlrpclib.Error, v:
122 return v
123
124class LinkBackHandler(object):
125
126 def __init__(self, trackbacks=True, pingbacks=True):
127 self.support_trackbacks = trackbacks
128 self.support_pingbacks = pingbacks
129
130 def findURIs(self, text):
131 p = HTMLLinkParser()
132 p.feed(text)
133 p.close()
134 ret = []
135 for uri, title in p.links:
136 try:
137 lbs = self.findLinkBackURIs(uri)
138 ret.append((uri, title, lbs))
139 except ValueError:
140 pass
141 except HTMLParseError:
142 pass
143 except urllib2.HTTPError:
144 pass
145 return ret
146
147 TB_RE = re.compile(r'trackback:ping="([^"]+)"')
148 PB_RE = re.compile(r'<link rel="pingback" href="([^"]+)" ?/?>')
149
150 def findLinkBackURIs(self, uri):
151 found = {}
152 ret = []
153 req = urllib2.Request(uri)
154 response = urllib2.urlopen(req)
155 info = response.info()
156 res = response.read()
157 p = HTMLLinkBackParser()
158 p.feed(res)
159 p.close()
160 if self.support_trackbacks:
161 matches = self.TB_RE.findall(res)
162 for url in matches:
163 if url not in found:
164 found[url]=1
165 ret.append(TrackBackURI(url))
166 for rel, url in p.links:
167 if rel=='trackback' and url not in found:
168 found[url]=1
169 ret.append(TrackBackURI(url))
170 if self.support_pingbacks:
171 pb_header = info.get('X-Pingback', None)
172 if pb_header:
173 ret.append(PingBackURI(pb_header))
174 else:
175 matches = self.PB_RE.findall(res)
176 for url in matches:
177 if url not in found:
178 found[url]=1
179 ret.append(PingBackURI(url))
180 for rel, url in p.links:
181 if rel=='pingback' and url not in found:
182 found[url]=1
183 ret.append(PingBackURI(url))
184 return ret