quarta-feira, 25 de fevereiro de 2015

username ok

pegando o nome já;
agora falta data

#author: Maria Beatriz Vaz

import urlparse
import urllib
import codecs
from bs4 import BeautifulSoup
import psycopg2
import sys

try:
    conn = psycopg2.connect("dbname='tweet_ic' user='postgres' host='localhost' port='5432' password='8001007'")
    print "Connected to database"
except:
    print "I am unable to connect to the database"

cur=conn.cursor()

urls = ["https://twitter.com/search?q=%23marcocivil%20since%3A2014-02-03%20until%3A2014-05-23&src=typd"]
visited = [urls[0]]

while len(urls) > 0:
try:
htmltext = urllib.urlopen(urls[0]).read()
except:
print urls[0]

soup = BeautifulSoup(htmltext)
#print soup.get_text().encode("utf-8")

visited.append(urls[0])

tweets = []
names = []

i = 0
for tag in soup.findAll('div',  { "class" : "content" }) :
for tweet in tag.findAll('p', {"class" : "js-tweet-text tweet-text"}):

tweet_text = tweet.get_text().encode("utf-8")
tweets.insert(0,tweet_text)

for name in tag.findAll ('strong', {"class" : "fullname js-action-profile-name show-popup-with-id"}):
name_text = name.get_text().encode("utf-8")
names.insert(0,name_text)
i=i+1

print 'chegou'

while i>=0:
try:
print 'name: ' + names[i-1] + ' - tweet: ' + tweets[i-1]
cur.execute(""" INSERT INTO teste_old (name , tweet) VALUES  ( %(name)s, %(tweet)s )""", ({ "name" : names[i-1] , "tweet" : tweets[i-1]}))
conn.commit()

except Exception as inst:
conn.rollback()
print 'Igual'

i=i-1


print ">>>visitou: " + urls[0]
urls.pop(0)

if conn:
conn.close()

Nenhum comentário:

Postar um comentário