# Quoins - A TurboGears blogging system. # Copyright (C) 2008 James E. Blair # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from xml.etree.ElementTree import XMLTreeBuilder from HTMLParser import HTMLParser, HTMLParseError import urllib, urllib2 import re import xmlrpclib class HTMLLinkParser(HTMLParser): def __init__(self): self.links = [] self.curlink_href = None self.curlink_title = None HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): if tag != 'a': return for k,v in attrs: if k=='href': self.curlink_href = v self.curlink_title = '' def handle_data(self, data): if self.curlink_href and data: self.curlink_title += data def handle_endtag(self, tag): if tag != 'a': return if self.curlink_href: title = self.curlink_title if not title: title = self.curlink_href self.links.append((self.curlink_href, title)) self.curlink_href = None self.curlink_title = None class HTMLLinkBackParser(HTMLParser): def __init__(self): self.links = [] HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): href = rel = None for k,v in attrs: if k=='href': href = v if k=='rel': rel = v if href and rel in ['pingback', 'trackback']: self.links.append((rel, href)) class LinkBackURI(object): def __init__(self, uri): self.uri = uri class TrackBackURI(LinkBackURI): def send(self, title='', excerpt='', url='', blog_name=''): try: msg = self._send(title, excerpt, url, blog_name) except: return 'Error sending TrackBack to %s' % self.uri if msg: return 'Remote error %s sending TrackBack to %s'%(msg, self.uri) return 'Sent TrackBack to %s' % self.uri def _send(self, title, excerpt, url, blog_name): builder = XMLTreeBuilder() data = urllib.urlencode(dict( title=title, excerpt=excerpt, url=url, blog_name=blog_name, )) req = urllib2.Request(self.uri, data) response = urllib2.urlopen(req) res = response.read() builder.feed(res.strip()) tree = builder.close() error = tree.find('error') error = int(error.text) if error: message = tree.find('message') return message.text return None class PingBackURI(LinkBackURI): def send(self, source_url='', target_url=''): try: msg = self._send(source_url, target_url) except: raise return 'Error sending PingBack to %s' % self.uri if msg: return 'Remote error %s sending PingBack to %s'%(msg, self.uri) return 'Sent PingBack to %s' % self.uri def _send(self, source_url, target_url): server = xmlrpclib.ServerProxy(self.uri) try: print 'ping', source_url, target_url ret = server.pingback.ping(source_url, target_url) print 'ok', ret return None except xmlrpclib.Error, v: return v class LinkBackHandler(object): def __init__(self, trackbacks=True, pingbacks=True): self.support_trackbacks = trackbacks self.support_pingbacks = pingbacks def findURIs(self, text): p = HTMLLinkParser() p.feed(text) p.close() ret = [] for uri, title in p.links: try: lbs = self.findLinkBackURIs(uri) ret.append((uri, title, lbs)) except ValueError: pass except HTMLParseError: pass except urllib2.HTTPError: pass return ret TB_RE = re.compile(r'trackback:ping="([^"]+)"') PB_RE = re.compile(r'') def findLinkBackURIs(self, uri): found = {} ret = [] req = urllib2.Request(uri) response = urllib2.urlopen(req) info = response.info() res = response.read() p = HTMLLinkBackParser() p.feed(res) p.close() if self.support_trackbacks: matches = self.TB_RE.findall(res) for url in matches: if url not in found: found[url]=1 ret.append(TrackBackURI(url)) for rel, url in p.links: if rel=='trackback' and url not in found: found[url]=1 ret.append(TrackBackURI(url)) if self.support_pingbacks: pb_header = info.get('X-Pingback', None) if pb_header: ret.append(PingBackURI(pb_header)) else: matches = self.PB_RE.findall(res) for url in matches: if url not in found: found[url]=1 ret.append(PingBackURI(url)) for rel, url in p.links: if rel=='pingback' and url not in found: found[url]=1 ret.append(PingBackURI(url)) return ret