pegando o nome já;
agora falta data
#author: Maria Beatriz Vaz
import urlparse
import urllib
import codecs
from bs4 import BeautifulSoup
import psycopg2
import sys
try:
conn = psycopg2.connect("dbname='tweet_ic' user='postgres' host='localhost' port='5432' password='8001007'")
print "Connected to database"
except:
print "I am unable to connect to the database"
cur=conn.cursor()
urls = ["https://twitter.com/search?q=%23marcocivil%20since%3A2014-02-03%20until%3A2014-05-23&src=typd"]
visited = [urls[0]]
while len(urls) > 0:
try:
htmltext = urllib.urlopen(urls[0]).read()
except:
print urls[0]
soup = BeautifulSoup(htmltext)
#print soup.get_text().encode("utf-8")
visited.append(urls[0])
tweets = []
names = []
i = 0
for tag in soup.findAll('div', { "class" : "content" }) :
for tweet in tag.findAll('p', {"class" : "js-tweet-text tweet-text"}):
tweet_text = tweet.get_text().encode("utf-8")
tweets.insert(0,tweet_text)
for name in tag.findAll ('strong', {"class" : "fullname js-action-profile-name show-popup-with-id"}):
name_text = name.get_text().encode("utf-8")
names.insert(0,name_text)
i=i+1
print 'chegou'
while i>=0:
try:
print 'name: ' + names[i-1] + ' - tweet: ' + tweets[i-1]
cur.execute(""" INSERT INTO teste_old (name , tweet) VALUES ( %(name)s, %(tweet)s )""", ({ "name" : names[i-1] , "tweet" : tweets[i-1]}))
conn.commit()
except Exception as inst:
conn.rollback()
print 'Igual'
i=i-1
print ">>>visitou: " + urls[0]
urls.pop(0)
if conn:
conn.close()
Nenhum comentário:
Postar um comentário