I've had a love/hate relationship with urlview for a while ( see UrlViewAnnoyingReAsking ), and I just got fed up with its failure to handle base64 and unusual characters in urls. Debian has a package called urlscan, and fixing urlview would be good, but I figured this might be a good way to practice some python.
The annoying thing about curses in Python (and other places?) lies in the unreliability of KEY_ENTER, so I had to also watch for keycode 10. If you have a non-US-standard keyboard, the keyscan codes might be different.
Update 20070503:
I wasn't happy with the keycodes being mixed in with the code, so I moved them to a dispatch table. I also removed some lame reduplication of code.
Update 20070520:
I added support for autodetection and autodecoding of base64 and html entities.
Update 20071116:
Google hits this enough that I put this code on software.haller.ws.
# XXX after 2.5, condense try...except...finally and cleanup()
import sys, re, email, email.Iterators, email.Parser, curses, os, traceback, base64, xml.sax.saxutils
re_url = re.compile( r'((?:f|ht)tps?://[^\'\"\<\> ]+)', re.I)
re_base64 = re.compile( r'[0-9a-zA-Z\/\+\=]{60,}' )
controls = ({
( ord("Q"), ord("q") ) : 'cleanup()',
( ord("B"), ord("b"), 10, curses.KEY_ENTER ) : 'browse(urls[selected])',
(curses.KEY_UP, curses.KEY_LEFT) : 'selected = (selected - 1) % len(urls)',
(curses.KEY_DOWN, curses.KEY_RIGHT) : 'selected = (selected + 1) % len(urls)',
})
def cleanup(kill=True):
curses.nocbreak(); stdscr.keypad(0); curses.echo(); curses.endwin()
if kill: sys.exit()
def browse(url):
os.system("firefox -new-tab '%s' >/dev/null 2>&1" % url)
cleanup()
# get msg and re-open stdin if needed
msg = email.Parser.Parser().parse(sys.stdin)
if not os.isatty(0):
fd = os.open('/dev/tty', os.O_RDONLY)
if fd < 0: raise ValueError("Unable to open /dev/tty. Exhausted file descriptors?")
os.dup2(fd, 0); os.close(fd)
# extract urls, the iterator decodes for us
urls = []
for l in email.Iterators.body_line_iterator(msg, 1):
if re_base64.search(l) is not None: l = base64.b64decode(l)
for u in re_url.finditer(l):
if len(urls) == 0 or u.group(1) != urls[-1]:
urls.append(u.group(1))
# reverse up any html encoding
for i,u in enumerate(urls): urls[i] = xml.sax.saxutils.unescape(urls[i])
# display urls in ncurses
try:
stdscr = curses.initscr()
stdscr.clear(); curses.noecho(); curses.cbreak(); stdscr.keypad(1)
curses.curs_set(0)
selected = 0
while True:
for i, u in enumerate(urls):
color = curses.A_NORMAL
if i == selected: color = curses.A_REVERSE
stdscr.addstr(i, 0, "%-2d %s" % (i+1,u), color)
if len(urls) == 0:
stdscr.addstr(selected, 0, "No URLs found!")
stdscr.refresh()
c = stdscr.getch()
for k in controls.keys():
if c in k: exec(controls[k])
except:
cleanup(0)
traceback.print_exc()
# clean up curses
cleanup()