Twitter-Mar-Menor / mar_menor.py
mar_menor.py
Raw
import snscrape.modules.twitter as sntwitter
import pandas as pd

#snscrape requires the interpreter Python 3.8 to run properly.

#https://github.com/MartinBeckUT/TwitterScraper/tree/master/snscrape

# Setting variables to be used below
maxTweets = 5000      #Maxtweetsperday
start_day = 1         #At the moment it must be 1.
end_day = 31          #At the moment it must be 31.
start_month = 1
end_month = 12
y_start = 2010
y_end = 2022

# Creating list to append tweet data to
tweets_list2 = []
for y in range(y_start,y_end+1):
    for m in range(start_month,end_month+1):
        for d in range(start_day,end_day+1):
        # Using TwitterSearchScraper to scrape data and append tweets to list
            for i,tweet in enumerate(sntwitter.TwitterSearchScraper('Mar Menor since:{:04d}-{:02d}-{:02d} until:{:04d}-{:02d}-{:02d}'.format(y,m,d,y,m,d+1)).get_items()):
                if i>maxTweets:
                    break
                tweets_list2.append([tweet.date, tweet.id, tweet.content, tweet.user.username,d])
            print('since:{:04d}-{:02d}-{:02d} until:{:04d}-{:02d}-{:02d}'.format(y,m,d,y,m,d+1))

    # Creating a dataframe from the tweets list above
    tweets_df2 = pd.DataFrame(tweets_list2, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Day'])
    tweets_df2['Datetime'] = tweets_df2['Datetime'].apply(lambda a: pd.to_datetime(a).date())

    # Display first 5 entries from dataframe
    print(tweets_df2.head())

    name_output = "Mar Menor " + str(y) + " tweets.xlsx"

    tweets_df2.to_excel(name_output, index=False, engine='xlsxwriter')
    tweets_list2 = []