#!c:/python25/python.exe
import datetime
##import re
import sqlite3
import urllib
import urlparse

from BeautifulSoup import BeautifulSoup

#
# DATE_MATCH regexp could be used in place of string split
#
## DATE_MATCH = re.compile (r"\S+\s(\d\d?)(?:st|nd|rd|th)\s(\S+)\s(\d{4})")


URL = "http://dev.goodtoread.org/whatsnew"
page = BeautifulSoup (urllib.urlopen (URL))

book_reviews = []

for whatsnew_date in page.findAll ("div", "whatsnew-date"):
  weekday, day, month, year = whatsnew_date.h2.string.split ()
  day = day[:-2]
  date = datetime.datetime.strptime ("%s %s %s" % (day, month, year), "%d %B %Y").date ()
  print date

  for book_summary in whatsnew_date.findAll ("div", "book-summary"):
    title = book_summary.find ("span", "book-title") or book_summary.find ("span", "book-title-quick")
    title = title.string
    print "\t", title

    synopsis = book_summary.find ("p", "synopsis").string
    summary = book_summary.find ("p", "summary")
    if summary:
      summary = summary.string

    image = book_summary.find ("img", "thumblet")
    if image:
      image_url = urlparse.urljoin (URL, image['src'])
      image_data = urllib.urlopen (image_url).read ()
    else:
      image_data = ""
    book_reviews.append ((title, synopsis, summary, buffer (image_data), date))

db = sqlite3.connect ("books.db")
db.execute ("DELETE FROM whatsnew")
db.executemany (
  "INSERT INTO whatsnew (title, synopsis, summary, image, date_added) VALUES (?, ?, ?, ?, ?)",
  book_reviews
)
db.commit ()
11