From 0eb942456239b2708a7b3b9fb628b0d0a4f89e5d Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Sat, 21 Sep 2019 15:52:51 -0700 Subject: Update United parsing --- email_assistant/assistant.py | 1 + email_assistant/plugins/united.py | 64 +++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 30 deletions(-) (limited to 'email_assistant') diff --git a/email_assistant/assistant.py b/email_assistant/assistant.py index d39b016..e6bfc66 100644 --- a/email_assistant/assistant.py +++ b/email_assistant/assistant.py @@ -198,6 +198,7 @@ class Assistant: try: for p in self.plugins: if p.match(msg): + self.log.debug("Matched %s", p.name) events = p.get_events(msg) break except Exception: diff --git a/email_assistant/plugins/united.py b/email_assistant/plugins/united.py index 38de3e2..0ed5dd8 100644 --- a/email_assistant/plugins/united.py +++ b/email_assistant/plugins/united.py @@ -31,28 +31,18 @@ import vobject from email_assistant import iata from email_assistant import plugin -def parse_dep_arr(flight_date, dep_arr): - flight_year = dateutil.parser.parse(flight_date).year - city, br, code, flight_time = dep_arr.span.children - city = city.strip() - code = code.strip()[1:-1] - code = code.split()[0] +def parse_dep_arr(flight_date, flight_time, flight_loc): + code = re.compile('^.*\((...)\)$').match(flight_loc).group(1) tz = iata.tzmap[code] - flight_time = flight_time.get_text().strip() - m = re.match(r'(.*) \((\d+[A-Z]+)\)', flight_time) - if m: - s = '%s%s %s' % (m.group(2), flight_year, m.group(1)) - flight_time = dateutil.parser.parse(s) - else: - flight_time = dateutil.parser.parse(flight_date +' '+ flight_time) + flight_time = dateutil.parser.parse(flight_date + ' ' + flight_time) flight_time = flight_time.replace(tzinfo=dateutil.tz.gettz(tz)) - return (city, code, flight_time) + return (code, flight_time) class Plugin(plugin.Plugin): name = 'united' def match(self, msg): - if ('unitedairlines@united.com' in msg['From'] and + if ('Receipts@united.com' in msg['From'] and 'Itinerary and Receipt' in msg['Subject']): return True @@ -61,32 +51,47 @@ class Plugin(plugin.Plugin): for part in msg.walk(): if part.get_content_type() == 'text/html': soup = BeautifulSoup(part.get_payload(decode=True), 'html.parser') - # confirmation_number = soup.find(class_="eTicketConfirmation").string index = 0 while True: - info = soup.find(id="ShowSegments_ShowSegment_ctl%02i_Flight" % index) + index += 1 + info = soup.find(string=re.compile('Flight %s of' % index)) if not info: break - for row in info.parents: - if row.name == 'tr': - break + while info.name != 'table': info = info.parent + + row = info.find('tr') + cols = row.find_all('td') + cols = [x.strip() for x in row.strings if x.strip()] + flight_num, flight_class = cols + + row = row.nextSibling + cols = row.find_all('td') + cols = [x.strip() for x in row.strings if x.strip()] + dep_date, arr_date = cols + + row = row.nextSibling + cols = row.find_all('td') + cols = [x.strip() for x in row.strings if x.strip()] + dep_time, arr_time = cols + + row = row.nextSibling cols = row.find_all('td') - flight_date, flight_num, flight_class, dep, arr, ac, meal = cols - flight_date = flight_date.get_text().strip() - flight_num = flight_num.get_text().strip() - flight_class = flight_class.get_text().strip() + cols = [x.strip() for x in row.strings if x.strip()] + dep_loc, arr_loc = cols - dep_city, dep_code, dep_time = parse_dep_arr(flight_date, dep) - arr_city, arr_code, arr_time = parse_dep_arr(flight_date, arr) - self.log.debug("dep: %s %s %s", dep_city, dep_code, dep_time) - self.log.debug("arr: %s %s %s", arr_city, arr_code, arr_time) + flight_num = flight_num.split()[-1] + dep_code, dep_time = parse_dep_arr(dep_date, dep_time, dep_loc) + arr_code, arr_time = parse_dep_arr(arr_date, arr_time, arr_loc) + + self.log.debug("dep: %s %s", dep_code, dep_time) + self.log.debug("arr: %s %s", arr_code, arr_time) cal = vobject.iCalendar() event = cal.add('vevent') event.add('dtstart').value = dep_time event.add('dtend').value = arr_time - summary = "Flight from %s to %s" % (dep_code, arr_code) + summary = "Flight %s from %s to %s" % (flight_num, dep_code, arr_code) event.add('summary').value = summary text = inscriptis.get_text(str(soup)) text = re.sub(r'([^ ]+)\s*\n', '\\1\n', text) @@ -95,5 +100,4 @@ class Plugin(plugin.Plugin): uid = hashlib.sha1((str(dep_time) + summary).encode('utf8')).hexdigest() event.add('uid').value = uid events.append(cal) - index += 1 return events -- cgit v1.2.3