diff options
Diffstat (limited to 'quoins/linkback.py')
-rw-r--r-- | quoins/linkback.py | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/quoins/linkback.py b/quoins/linkback.py new file mode 100644 index 0000000..404789c --- /dev/null +++ b/quoins/linkback.py | |||
@@ -0,0 +1,184 @@ | |||
1 | # Quoins - A TurboGears blogging system. | ||
2 | # Copyright (C) 2008 James E. Blair <corvus@gnu.org> | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU General Public License as published by | ||
6 | # the Free Software Foundation, either version 3 of the License, or | ||
7 | # (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU General Public License | ||
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | |||
17 | from xml.etree.ElementTree import XMLTreeBuilder | ||
18 | from HTMLParser import HTMLParser, HTMLParseError | ||
19 | import urllib, urllib2 | ||
20 | import re | ||
21 | import xmlrpclib | ||
22 | |||
23 | class HTMLLinkParser(HTMLParser): | ||
24 | def __init__(self): | ||
25 | self.links = [] | ||
26 | self.curlink_href = None | ||
27 | self.curlink_title = None | ||
28 | HTMLParser.__init__(self) | ||
29 | |||
30 | def handle_starttag(self, tag, attrs): | ||
31 | if tag != 'a': return | ||
32 | for k,v in attrs: | ||
33 | if k=='href': | ||
34 | self.curlink_href = v | ||
35 | self.curlink_title = '' | ||
36 | |||
37 | def handle_data(self, data): | ||
38 | if self.curlink_href and data: | ||
39 | self.curlink_title += data | ||
40 | |||
41 | def handle_endtag(self, tag): | ||
42 | if tag != 'a': return | ||
43 | if self.curlink_href: | ||
44 | title = self.curlink_title | ||
45 | if not title: | ||
46 | title = self.curlink_href | ||
47 | self.links.append((self.curlink_href, title)) | ||
48 | self.curlink_href = None | ||
49 | self.curlink_title = None | ||
50 | |||
51 | class HTMLLinkBackParser(HTMLParser): | ||
52 | def __init__(self): | ||
53 | self.links = [] | ||
54 | HTMLParser.__init__(self) | ||
55 | |||
56 | def handle_starttag(self, tag, attrs): | ||
57 | href = rel = None | ||
58 | for k,v in attrs: | ||
59 | if k=='href': | ||
60 | href = v | ||
61 | if k=='rel': | ||
62 | rel = v | ||
63 | if href and rel in ['pingback', 'trackback']: | ||
64 | self.links.append((rel, href)) | ||
65 | |||
66 | class LinkBackURI(object): | ||
67 | def __init__(self, uri): | ||
68 | self.uri = uri | ||
69 | |||
70 | class TrackBackURI(LinkBackURI): | ||
71 | def send(self, title='', excerpt='', url='', blog_name=''): | ||
72 | try: | ||
73 | msg = self._send(title, excerpt, url, blog_name) | ||
74 | except: | ||
75 | return 'Error sending TrackBack to %s' % self.uri | ||
76 | if msg: | ||
77 | return 'Remote error %s sending TrackBack to %s'%(msg, self.uri) | ||
78 | return 'Sent TrackBack to %s' % self.uri | ||
79 | |||
80 | def _send(self, title, excerpt, url, blog_name): | ||
81 | builder = XMLTreeBuilder() | ||
82 | data = urllib.urlencode(dict( | ||
83 | title=title, | ||
84 | excerpt=excerpt, | ||
85 | url=url, | ||
86 | blog_name=blog_name, | ||
87 | )) | ||
88 | |||
89 | req = urllib2.Request(self.uri, data) | ||
90 | response = urllib2.urlopen(req) | ||
91 | res = response.read() | ||
92 | |||
93 | builder.feed(res.strip()) | ||
94 | tree = builder.close() | ||
95 | error = tree.find('error') | ||
96 | error = int(error.text) | ||
97 | if error: | ||
98 | message = tree.find('message') | ||
99 | return message.text | ||
100 | return None | ||
101 | |||
102 | class PingBackURI(LinkBackURI): | ||
103 | def send(self, source_url='', target_url=''): | ||
104 | try: | ||
105 | msg = self._send(source_url, target_url) | ||
106 | except: | ||
107 | raise | ||
108 | return 'Error sending PingBack to %s' % self.uri | ||
109 | if msg: | ||
110 | return 'Remote error %s sending PingBack to %s'%(msg, self.uri) | ||
111 | return 'Sent PingBack to %s' % self.uri | ||
112 | |||
113 | def _send(self, source_url, target_url): | ||
114 | server = xmlrpclib.ServerProxy(self.uri) | ||
115 | |||
116 | try: | ||
117 | print 'ping', source_url, target_url | ||
118 | ret = server.pingback.ping(source_url, target_url) | ||
119 | print 'ok', ret | ||
120 | return None | ||
121 | except xmlrpclib.Error, v: | ||
122 | return v | ||
123 | |||
124 | class LinkBackHandler(object): | ||
125 | |||
126 | def __init__(self, trackbacks=True, pingbacks=True): | ||
127 | self.support_trackbacks = trackbacks | ||
128 | self.support_pingbacks = pingbacks | ||
129 | |||
130 | def findURIs(self, text): | ||
131 | p = HTMLLinkParser() | ||
132 | p.feed(text) | ||
133 | p.close() | ||
134 | ret = [] | ||
135 | for uri, title in p.links: | ||
136 | try: | ||
137 | lbs = self.findLinkBackURIs(uri) | ||
138 | ret.append((uri, title, lbs)) | ||
139 | except ValueError: | ||
140 | pass | ||
141 | except HTMLParseError: | ||
142 | pass | ||
143 | except urllib2.HTTPError: | ||
144 | pass | ||
145 | return ret | ||
146 | |||
147 | TB_RE = re.compile(r'trackback:ping="([^"]+)"') | ||
148 | PB_RE = re.compile(r'<link rel="pingback" href="([^"]+)" ?/?>') | ||
149 | |||
150 | def findLinkBackURIs(self, uri): | ||
151 | found = {} | ||
152 | ret = [] | ||
153 | req = urllib2.Request(uri) | ||
154 | response = urllib2.urlopen(req) | ||
155 | info = response.info() | ||
156 | res = response.read() | ||
157 | p = HTMLLinkBackParser() | ||
158 | p.feed(res) | ||
159 | p.close() | ||
160 | if self.support_trackbacks: | ||
161 | matches = self.TB_RE.findall(res) | ||
162 | for url in matches: | ||
163 | if url not in found: | ||
164 | found[url]=1 | ||
165 | ret.append(TrackBackURI(url)) | ||
166 | for rel, url in p.links: | ||
167 | if rel=='trackback' and url not in found: | ||
168 | found[url]=1 | ||
169 | ret.append(TrackBackURI(url)) | ||
170 | if self.support_pingbacks: | ||
171 | pb_header = info.get('X-Pingback', None) | ||
172 | if pb_header: | ||
173 | ret.append(PingBackURI(pb_header)) | ||
174 | else: | ||
175 | matches = self.PB_RE.findall(res) | ||
176 | for url in matches: | ||
177 | if url not in found: | ||
178 | found[url]=1 | ||
179 | ret.append(PingBackURI(url)) | ||
180 | for rel, url in p.links: | ||
181 | if rel=='pingback' and url not in found: | ||
182 | found[url]=1 | ||
183 | ret.append(PingBackURI(url)) | ||
184 | return ret | ||