import pandas as pd from nltk.sentiment.vader import SentimentIntensityAnalyzer import nltk from langdetect import detect from sentiment_analysis_spanish import sentiment_analysis #Spanish sentiment analyzer (Bello, H. https://pypi.org/project/sentiment-analysis-spanish/ ) sentiment = sentiment_analysis.SentimentAnalysisSpanish() #English sentiment analyzer (VADER, Hutto & Gilbert (2014, May)) # https://predictivehacks.com/how-to-run-sentiment-analysis-in-python-using-vader/ nltk.download('vader_lexicon') sid = SentimentIntensityAnalyzer() for year in range(2010,2023): #year = 2022 iinput = "Mar Menor " + str(year) + " tweets.xlsx" data = pd.read_excel(iinput) data['is_positive'] = 0 data['is_negative'] = 0 data['is_neutral'] = 0 data['Text_eng'] = '' for index, row in data.iterrows(): try: language = detect(row['Text']) except: language = '' #data.loc[index, 'Text_eng'] = language if language == 'es': prob_positive = sentiment.sentiment(str(row['Text'])) if prob_positive > 0.6: data.loc[index, 'is_positive'] = 1 elif prob_positive < 0.4: data.loc[index, 'is_negative'] = 1 else: data.loc[index, 'is_neutral'] = 1 if language == 'en': compound = sid.polarity_scores(str(row['Text']))['compound'] if compound > 0: data.loc[index, 'is_positive'] = 1 elif compound < 0: data.loc[index, 'is_negative'] = 1 elif compound == 0: data.loc[index, 'is_neutral'] = 1 else: pass means_by_day = data[['Datetime', 'is_positive', 'is_negative', 'is_neutral']].groupby( "Datetime", as_index=False).mean() #as_index false allow us to keep datetime tweets_by_day = data[['Datetime', 'is_positive', 'is_negative', 'is_neutral']].groupby("Datetime", as_index=False).sum() for x in ['is_positive', 'is_negative', 'is_neutral']: means_by_day[x] = means_by_day.Datetime.map(tweets_by_day.set_index('Datetime')[x]) #By datetime, assign values of tweets_by_day to means_by_day tweets_by_day = data[['Datetime', 'Text']].groupby("Datetime", as_index=False).count() #The maximum is in Twitter_data_collection.py, and it is set to 100.000 means_by_day['number of tweets'] = means_by_day.Datetime.map(tweets_by_day.set_index('Datetime')['Text']) means_by_day['date'] = means_by_day['Datetime'].dt.date output = "Mar Menor " + str(year) + " sentiment.xlsx" means_by_day.to_excel(output, index=False) print(year) print("done")