<center id="pz5tg"></center>
  1. <object id="pz5tg"></object>

    <code id="pz5tg"><small id="pz5tg"></small></code>

    #!c:/python25/python.exe
    import datetime
    ##import re
    import sqlite3
    import urllib
    import urlparse
    
    from BeautifulSoup import BeautifulSoup
    
    #
    # DATE_MATCH regexp could be used in place of string split
    #
    ## DATE_MATCH = re.compile (r"\S+\s(\d\d?)(?:st|nd|rd|th)\s(\S+)\s(\d{4})")
    
    
    URL = "http://dev.goodtoread.org/whatsnew"
    page = BeautifulSoup (urllib.urlopen (URL))
    
    book_reviews = []
    
    for whatsnew_date in page.findAll ("div", "whatsnew-date"):
      weekday, day, month, year = whatsnew_date.h2.string.split ()
      day = day[:-2]
      date = datetime.datetime.strptime ("%s %s %s" % (day, month, year), "%d %B %Y").date ()
      print date
    
      for book_summary in whatsnew_date.findAll ("div", "book-summary"):
        title = book_summary.find ("span", "book-title") or book_summary.find ("span", "book-title-quick")
        title = title.string
        print "\t", title
    
        synopsis = book_summary.find ("p", "synopsis").string
        summary = book_summary.find ("p", "summary")
        if summary:
          summary = summary.string
    
        image = book_summary.find ("img", "thumblet")
        if image:
          image_url = urlparse.urljoin (URL, image['src'])
          image_data = urllib.urlopen (image_url).read ()
        else:
          image_data = ""
        book_reviews.append ((title, synopsis, summary, buffer (image_data), date))
    
    db = sqlite3.connect ("books.db")
    db.execute ("DELETE FROM whatsnew")
    db.executemany (
      "INSERT INTO whatsnew (title, synopsis, summary, image, date_added) VALUES (?, ?, ?, ?, ?)",
      book_reviews
    )
    db.commit ()
    
    11