import twint import re import csv from collections import Counter from collections import OrderedDict import os.path import math import ast from datetime import date from dateutil.parser import parse import datetime from num2words import num2words import time import aiohttp import asyncio import traceback user_input = input("Six Degrees of Separation on Twitter \nThere are 4 options: \n1. Single starting username (e.g. Person A) -> Person B (currently only working option)\n======REST IS UNDER CONSTRUCTION======\n2. Single search term (e.g. Users that have mentioned “Potato Chips”) or \n3. Multiple starting usernames (e.g. person a, person b, person c) \n4. Multiple starting username(s) and search term(s) \nInput a number: ") #Functions getchya functions! def downloadfollowers(usource): print("Downloading followers for " + str(usource)) x = twint.Config() x.Username = str(usource.lower()) x.Store_object = True x.Store_csv = True x.Resume = "data/" + str(usource) + " resume followers.csv" #x.Output = str(usource) + " followers.csv" x.Output = "data/" + str(usource) + " followers.csv" twint.run.Followers(x) while True: try: twint.run.Followers(x) break except aiohttp.ClientConnectorError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except aiohttp.ClientOSError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except aiohttp.ServerDisconnectedError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except asyncio.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except concurrent.futures._base.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') except aiohttp.client_exceptions.ClientConnectorError: time.sleep(1) print('Cant connect to internet. Restarting...') except aiohttp.client_exceptions.ServerDisconnectedError: time.sleep(1) print('Cant connect to internet. Restarting...') def downloadfollowing(utarget): if os.path.exists('data/' + str(utarget) + ' resume following.csv'): os.remove('data/' + str(utarget) + ' resume following.csv') print("Downloading who " + str(utarget) + " is following") y = twint.Config() y.Username = str(utarget.lower()) y.Store_csv = True #y.Limit = 30000 y.Resume = "data/" + str(utarget) + " resume following.csv" #y.Output = str(utarget) + " following.csv" y.Output = "data/" + str(utarget) + " following.csv" while True: try: twint.run.Following(y) break except aiohttp.ClientConnectorError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except aiohttp.ClientOSError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except aiohttp.ServerDisconnectedError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except asyncio.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') except KeyboardInterrupt: os.remove("data/" + str(utarget) + " following.csv") os.remove("data/" + str(utarget) + " resume following.csv") raise ValueError("Quit program. Deleted following and resume files for " + str(utarget) + ".") def downloadfavorites(userfavorites): print("Downloading favorites for: " + str(userfavorites)) fv = twint.Config() fv.Username = str(userfavorites.lower()) fv.Store_csv = True #fv.Output = str(userfavorites) + " favorites.csv" fv.Output = "data/" + str(userfavorites) + " favorites.csv" #twint.run.Favorites(fv) while True: try: twint.run.Favorites(fv) break except aiohttp.ClientConnectorError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(userfavorites) + " favorites.csv"): print('Removing the incomplete file ' + str(userfavorites) + " favorites.csv") os.remove('data/' + str(userfavorites) + " favorites.csv") time.sleep(2) except aiohttp.ClientOSError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(userfavorites) + " favorites.csv"): print('Removing the incomplete file ' + str(userfavorites) + " favorites.csv") os.remove('data/' + str(userfavorites) + " favorites.csv") time.sleep(2) except aiohttp.ServerDisconnectedError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(userfavorites) + " favorites.csv"): print('Removing the incomplete file ' + str(userfavorites) + " favorites.csv") os.remove('data/' + str(userfavorites) + " favorites.csv") time.sleep(2) except asyncio.TimeoutError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(userfavorites) + " favorites.csv"): print('Removing the incomplete file ' + str(userfavorites) + " favorites.csv") os.remove('data/' + str(userfavorites) + " favorites.csv") time.sleep(2) except KeyboardInterrupt: os.remove("data/" + str(userfavorites) + " favorites.csv") #os.remove("data/" + str(usertweets) + " tweets.csv") raise ValueError("Quit program. Deleted favorites of "+ str(userfavorites) + ".") def followerscount(frcount): print("Checking followers count for: " + str(frcount)) frc = twint.Config() frc.Username = str(frcount.lower()) frc.Store_csv = True #frc.Output = str(frcount) + " frcount.csv" frc.Output = "data/" + str(frcount) + " frcount.csv" while True: try: twint.run.Lookup(frc) break except aiohttp.ClientConnectorError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(frcount) + " frcount.csv"): print('Removing the incomplete file ' + str(frcount) + " frcount.csv") os.remove('data/' + str(frcount) + " frcount.csv") time.sleep(2) except aiohttp.ClientOSError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(frcount) + " frcount.csv"): print('Removing the incomplete file ' + str(frcount) + " frcount.csv") os.remove('data/' + str(frcount) + " frcount.csv") time.sleep(2) except aiohttp.ServerDisconnectedError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(frcount) + " frcount.csv"): print('Removing the incomplete file ' + str(frcount) + " frcount.csv") os.remove('data/' + str(frcount) + " frcount.csv") time.sleep(2) except asyncio.TimeoutError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(frcount) + " frcount.csv"): print('Removing the incomplete file ' + str(frcount) + " frcount.csv") os.remove('data/' + str(frcount) + " frcount.csv") time.sleep(2) def followingcount(fcount): print("Checking following count for: " + str(fcount)) fc = twint.Config() fc.Username = str(fcount.lower()) fc.Store_csv = True #fc.Output = str(fcount) + " fcount.csv" fc.Output = "data/" + str(fcount) + " fcount.csv" while True: try: twint.run.Lookup(fc) break except aiohttp.ClientConnectorError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(fcount) + " fcount.csv"): print('Removing the incomplete file ' + str(fcount) + " fcount.csv") os.remove('data/' + str(fcount) + " fcount.csv") time.sleep(2) except aiohttp.ClientOSError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(fcount) + " fcount.csv"): print('Removing the incomplete file ' + str(fcount) + " fcount.csv") os.remove('data/' + str(fcount) + " fcount.csv") time.sleep(2) except aiohttp.ServerDisconnectedError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(fcount) + " fcount.csv"): print('Removing the incomplete file ' + str(fcount) + " fcount.csv") os.remove('data/' + str(fcount) + " fcount.csv") time.sleep(2) except asyncio.TimeoutError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(fcount) + " fcount.csv"): print('Removing the incomplete file ' + str(fcount) + " fcount.csv") os.remove('data/' + str(fcount) + " fcount.csv") time.sleep(2) except KeyError: exception = traceback.format_exc() if "KeyError: 'legacy'" in exception: print(f'Account {fcount} likely does not exist.') def favoritescount(favcount): print("Checking favorites count for: " + str(favcount)) fvc = twint.Config() fvc.Username = str(favcount.lower()) fvc.Store_csv = True #fvc.Output = str(favcount) + " favcount.csv" fvc.Output = "data/" + str(favcount) + " favcount.csv" while True: try: twint.run.Lookup(fvc) break except aiohttp.ClientConnectorError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(favcount) + " favcount.csv"): print('Removing the incomplete file ' + str(favcount) + " favcount.csv") os.remove('data/' + str(favcount) + " favcount.csv") time.sleep(2) except aiohttp.ClientOSError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(favcount) + " favcount.csv"): print('Removing the incomplete file ' + str(favcount) + " favcount.csv") os.remove('data/' + str(favcount) + " favcount.csv") time.sleep(2) except aiohttp.ServerDisconnectedError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(favcount) + " favcount.csv"): print('Removing the incomplete file ' + str(favcount) + " favcount.csv") os.remove('data/' + str(favcount) + " favcount.csv") time.sleep(2) except asyncio.TimeoutError: print('Cant connect to internet. Retrying in 2 seconds...') if os.path.exists('data/' + str(favcount) + " favcount.csv"): print('Removing the incomplete file ' + str(favcount) + " favcount.csv") os.remove('data/' + str(favcount) + " favcount.csv") time.sleep(2) def downloadtweets(usertweets): print("Downloading tweets for: " + str(usertweets)) dt = twint.Config() dt.Username = str(usertweets.lower()) dt.Limit = 20000 dt.Store_csv = True dt.Resume = "data/" + str(usertweets) + " resume tweets.csv" #dt.Output = str(usertweets) + " tweets.csv" dt.Output = "data/" + str(usertweets) + " tweets.csv" twint.run.Search(dt) while True: try: twint.run.Search(dt) break except aiohttp.ClientConnectorError: time.sleep(1) print('Cant connect to internet. Restarting...') #twint.run.Followers(x) except aiohttp.ClientOSError: time.sleep(1) print('Cant connect to internet. Restarting...') except aiohttp.ServerDisconnectedError: time.sleep(1) print('Cant connect to internet. Restarting...') except asyncio.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') except concurrent.futures._base.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') except KeyboardInterrupt: os.remove("data/" + str(usertweets) + " resume tweets.csv") os.remove("data/" + str(usertweets) + " tweets.csv") raise ValueError("Quit program. Deleted tweets by " + str(usertweets) + ".") def downloadtweets_by_specific_user_mentioning(x, y): print("Downloading tweets by " + str(x) + " mentioning " + str(y)) z = twint.Config() z.Search = str(y.lower()) z.Username = str(x.lower()) z.Limit = 10000 z.Resume = "data/" + str(x) + " mentioning " + str(y) + ' resume file.csv' #x is the specific user you want tweets from #y is the keyword (the user you want mentioned) z.Store_csv = True #z.Output = str(x) + " mentioning " + str(y) + '.csv' z.Output = "data/" + str(x) + " mentioning " + str(y) + '.csv' while True: try: twint.run.Search(z) break except aiohttp.ClientConnectorError: time.sleep(1) print('Cant connect to internet. Restarting...') except aiohttp.ClientOSError: time.sleep(1) print('Cant connect to internet. Restarting...') except aiohttp.ServerDisconnectedError: time.sleep(1) print('Cant connect to internet. Restarting...') except asyncio.TimeoutError: time.sleep(1) print('Cant connect to internet. Restarting...') except KeyboardInterrupt: os.remove('data/' + str(x) + " mentioning " + str(y) + ' resume file.csv') os.remove("data/" + str(x) + " mentioning " + str(y) + '.csv') raise ValueError("Quit program. Deleted tweets by " + str(x) + " mentioning " + str(y) + ".") def makefollowinglist(user): if os.path.exists('data/' + str(user) + ' resume following.csv'): os.remove('data/' + str(user) + ' resume following.csv') print("Making following list for " + str(user)) if not os.path.exists('data/' + str(user) + ' following.csv'): downloadfollowing(user) if not os.path.exists('data/' + str(user) + ' following.csv'): print("Assuming user does not exist and skipping download") followinglist = [] return with open('data/' + str(user) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) followinglist = [] #targetuserfollowinglist is who the target user is following (e.g. Who Mojo Jojo is following) for row in csv_reader: followinglist.append(row[0]) print('Saving following list for ' + str(user) + ' in an array') return followinglist def makefollowerslist(user): #Download followers CSV file and save every row after header to an array if not os.path.exists('data/' + str(user) + ' followers.csv'): print("Did not find " + str(user) + " csv file in data folder") os.remove("data/" + str(user) + " resume followers.csv") downloadfollowers(user) with open('data/' + str(user) + ' followers.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) followerslist = [] for row in csv_reader: followerslist.append(row[0]) print('Saving followers list for ' + str(user) + ' in an array') return followerslist def makementionslist(user): print("Opening tweets CSV to count mentions...") if not os.path.exists('data/' + str(user) + " tweets.csv"): downloadtweets(user) if os.path.exists('data/' + str(user) + " tweets.csv"): with open('data/' + str(user) + ' tweets.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) usermentions = [] resfinalT = [] mentionslist = [] for row in csv_reader: #print(str(row)) usermentions.append(row[11]) #f = open('tusermentionsT for ' + str(userone) + '.txt', 'w') #f.write(str(tusermentionsT)) #f.close() for mention in usermentions: #print("mention for " + str(userone) + " is " + str(mention)) #This makes the string '['']' from the CSV mentions column into a list resT = list(map(str.strip, mention.strip('][').replace("'", '').split(','))) #old resT - this method is slower, trying new method above #resT = ast.literal_eval(mention) for r in resT: resfinalT.append(r) #print("final list", res) for i in resfinalT: if str(i) != str('[]'): if str(i) != str(''): #tmentions list now contains all mentions by target user (e.g. person b) mentionslist.append(i) print("Counting users mentioned by " + str(user) + "...") mentionscount = OrderedDict(Counter(mentionslist).most_common()) return mentionscount, mentionslist def makefavoriteslist(user): if not os.path.exists('data/' + str(user) + " favorites.csv"): print('Did not find ' + str(user) + ' favorites.csv file in data folder...') print('Downloading favorites for ' + str(user)) downloadfavorites(user) if not os.path.exists('data/' + str(user) + " favorites.csv"): print("After attempted download, found no favorites, assuming there are none for " + str(user)) favoriteduserslist = [] return favoriteduserslist if os.path.exists('data/' + str(user) + " favorites.csv"): with open('data/' + str(user) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) favoriteduserslist = [] for row in csv_reader: favoriteduserslist.append(row[7]) print(str(favoriteduserslist)) return favoriteduserslist def makeinteractionslists(user, followinglist, favoriteslist, mentionslist): print('Saving favorited users for ' + str(user) + ' in a dict') print('Counting frequency of likes for every user liked by ' + str(user)) interactions = OrderedDict(Counter(favoriteslist).most_common()) favoritesdict = OrderedDict(Counter(favoriteslist).most_common()) mentionsdict = OrderedDict(Counter(mentionslist).most_common()) interactionsforonlyfollowing = interactions.copy() for n in list(interactionsforonlyfollowing.keys()): #try: if n not in followinglist: del interactionsforonlyfollowing[n] for u in followinglist: if u not in interactions.keys(): interactions.update({u: 0}) for tm in mentionsdict.keys(): if tm not in interactions.keys(): interactions.update({tm: 0}) print("Following list (dict) is counted and is: " + str(interactions)) f = open('Saving interactions list for' + str(user) + '.txt', 'w') f.write(str(interactions)) f.close() f = open('Saving interactions for following only list for' + str(user) + '.txt', 'w') f.write(str(interactionsforonlyfollowing)) f.close() return interactions, interactionsforonlyfollowing, favoritesdict def lastdateprevioususerfavoriteduser(previoususer, user): if not os.path.exists('data/' + previoususer + " favorites.csv"): downloadfavorites(previoususer) if os.path.exists('data/' + previoususer + " favorites.csv"): favoritedates = [] favorites_dict = {} with open('data/' + str(previoususer) + " favorites.csv", 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) for row in csv_reader: if str(row[7]) not in favorites_dict.keys(): favorites_dict.update({str(row[7]):[str(row[3])]}) elif str(row[7]) in favorites_dict.keys(): favorites_dict[str(row[7])].append(str(row[3])) #if str(row[7]) == str(user): #favoritedates.append(row[3]) #elif str(row[7]) != str(user): if user in favorites_dict.keys(): favoritedates.append(favorites_dict.get(str(user))[0]) elif user not in favorites_dict.keys(): favoritedates.append('None') #print(favoriteddatedict) latestfavoritedate = favoritedates[0] print(user, 'was last favorited', latestfavoritedate) elif not os.path.exists('data/' + previoususer + " favorites.csv"): latestfavoritedate = 'None' return latestfavoritedate def getfollowingcount(user): #if not os.path.exists('data/' + str(user) + ' fcount.csv'): #download following count followingcount(user) if os.path.exists('data/' + str(user) + ' fcount.csv'): with open('data/' + str(user) + ' fcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) followingnumber = [] for row in csv_reader: followingnumber.append(row[9]) print(int(followingnumber[-1])) elif not os.path.exists('data/' + str(user) + ' fcount.csv'): return return int(followingnumber[-1]) def checkifuserexists(user): if not os.path.exists('data/' + str(user) + ' fcount.csv'): followingcount(user) if not os.path.exists('data/' + str(user) + ' fcount.csv'): return False elif os.path.exists('data/' + str(user) + ' fcount.csv'): return True def lastdateprevioususermentioneduser(previoususer, user): latestmentiondate = [] mentioneddatelist = [] if not os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): downloadtweets_by_specific_user_mentioning(previoususer, user) if os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): with open('data/' + str(previoususer) + " mentioning " + str(user) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) for row in csv_reader: mentioneddatelist.append(row[3]) latestmentiondate = mentioneddatelist[0] print(str(previoususer) + " last mentioned " + str(user) + ' on ' + str(latestmentiondate)) elif not os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): latestmentiondate = 'None' return latestmentiondate def counttimesprevioususermentioneduser(previoususer, user): if not os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): downloadtweets_by_specific_user_mentioning(previoususer, user) #if os.path.exists('data/' + str(s) + " favorites.csv"): if os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): with open('data/' + str(previoususer) + " mentioning " + str(user) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV mentionscount = sum(1 for row in csv_reader) print("Counting how many times " + str(previoususer) + " mentioned " + str(user) + "...") elif not os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): print('Found no file in data folder called:' + str(previoususer) + " mentioning " + str(user) + '.csv' + ' Probability is mentions are 0. Setting mentions to 0.') mentionscount = str(0) return mentionscount def checkifuserfollowedfavoritedmentionedbyprevioususer(user, previoususer, followedlist, favoritedlist, interacteddict): #if previous user is target user (Mojo Jojo), then user would be buttercup #if previous user is buttercup, then user would be bubbles bubbles #etc. connection_type = [] if user in followedlist: connection_type.append('Follows') followed_by = 'Yes' elif user not in followedlist: followed_by = 'No' latestfavoritedate = [] favoriteddatedict = {} if favoritedlist != None: if user in favoritedlist: connection_type.append('Favorited') print("Found " + str(user) + " in " + str(previoususer) + "'s favorites") print(str(previoususer) + " has liked " + str(user) + ' ' + str(interacteddict.get(user)) + ' time(s).') if not os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): downloadtweets_by_specific_user_mentioning(previoususer, user) #if os.path.exists('data/' + str(s) + " favorites.csv"): if os.path.exists('data/' + str(previoususer) + " mentioning " + str(user) + '.csv'): connection_type.append("Mentioned") #if user in mentioneddict: # print("Found " + str(user) + "in mentions") return connection_type, followed_by def comparefollowinglistwithactualnumber(user): if not os.path.exists(str(user) + " following.csv"): downloadfollowing(user) elif os.path.exists(str(user) + " following.csv"): print('i plan to delete the existing file and download a new one') if os.path.exists(str(user) + " following.csv"): print('Found ' + str(user) + " following.csv in current folder") followingcount(user) print("Found following list for " + str(user)) print("Going to compare with current number") #Open counted following csv with open(str(user) + ' fcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fcounter = [] for row in csv_reader: fcounter.append(row[9]) print(int(fcounter[-1])) with open(str(user) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_count1 = sum(1 for row in csv_reader) #compare the number of rows (users - row_count) in the CSV with the number of current followers (from twint - frcounter) relatively (if they are within 10 followers) if abs(int(row_count1) - int(fcounter[-1])) <= 2: #if math.isclose(row_count1,int(fcounter[0]), rel_tol=2): print('Following count is relatively the same (give or take 2)') print('row_count from existing CSV is : ' + str(row_count1) + ' and fcounter is: ' + str(fcounter[-1])) #if the number of rows is not within 2 of the current number of followers, download a CSV of followers else: print('Following count not the same\n' + 'row_count1 is ' + str(row_count1) + '\nfcounter is ' + str(fcounter[-1])) print('Downloading following for ' + str(user)) downloadfollowing(user) else: downloadfollowing(user) def skipif_followinglist_doesntexist(user): if not os.path.exists('data/' + str(user) + " following.csv"): downloadfollowing(user) if not os.path.exists('data/' + str(user) + " following.csv"): skippedusers = [] print("Did not find " + str(user) + " following.csv in the current folder") skippedusers.append(user) f = open('Users skipped due to followingcsv not found for' + str(user_target) + ' and ' + str(user_source) + '.txt', 'a') f.write(str(skippedusers)) f.close() return True if os.path.exists('data/' + str(user) + " following.csv"): return False def skipuseriffollowingtoomanyandmentionslow(previoususer, user, followingcount): if previoususer == user: return False if followingcount == None: return False if followingcount > 7000: if followingcount > 30000: return False countedrows = counttimesprevioususermentioneduser(previoususer, user) if countedrows == str(0): print("Following is greater than 7,000. Appending to list to use later but using only likes and mention frequencies") followingcounthigh = [] followingcounthigh.append(user) f = open('Users skipped due to their following being greater than 7K for 3rd degree - ' + str(user_target) + ' and ' + str(user_source) + '.txt', 'a') f.write('\n' + str(user)) #saved in list called followingcounthighX f.close() return False elif int(countedrows) < 10: return False elif int(countedrows) > 10: return True def skipifuserisprivate(user): if not os.path.exists('data/' + str(user) + ' fcount.csv'): followingcount(user) if not os.path.exists('data/' + str(user) + ' fcount.csv'): print("Didn't find " + str(user)) notfound = [] notfound.append(user) print("fcount file not found. Assuming it's non-existent and skipping.") return True elif os.path.exists('data/' + str(user) + ' fcount.csv'): return False def downloadfollowingifnotthere(user): if not os.path.exists('data/' + str(user) + " following.csv"): downloadfollowing(user) def is_date(string, fuzzy=False): """ Return whether the string can be interpreted as a date. :param string: str, string to check for date :param fuzzy: bool, ignore unknown tokens in string if True """ try: parse(string, fuzzy=fuzzy) return True except ValueError: return False def dontaddheaderiffileexists(numberofdegree): if os.path.exists('results/' + str(num2words(numberofdegree, to='ordinal').capitalize()) + ' Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): headeradded = True return headeradded else: headeradded = False return headeradded def interactionscore(a, b) : if int(a) == 0 and int(b) == 0: result = 0 if int(a) != 0 or int(b) != 0: result = format(float(-(((abs((float(a)-float(b))/max(float(a),float(b))))) - 1)), 'f') #result = float(-(((abs((a-b)/max(a,b)))) - 1)) if float(result) <= 0: result = 0 return result def generatefollowingfavoritementionsinteractionslists(user): print("Phase 1: Making following list for " + str(user)) userfollowinglist = makefollowinglist(user) print("Phase 2: Making interactions list for " + str(user) + ". Counting favorited users.") userfavorites = makefavoriteslist(user) usermentionscount, usermentionslist = makementionslist(user) userinteractions, userinteractionsandfollowing, favorites_dict = makeinteractionslists(user, userfollowinglist, userfavorites, usermentionslist) return userfollowinglist, userfavorites, usermentionscount, usermentionslist, userinteractions, userinteractionsandfollowing, favorites_dict def score_mentions_number(x): a = 1 n = 8 #b = 0.005 b = 0.01 y = a*(1-b)**(n*float(x)) y = -(y - 1) return y def score_recency(x): a = 1 n = 1.001 #b = 0.005 #b = 0.0047 b = 0.006 #x = 60 y = a*(1-b)**(n*float(x)) return y def get_mentionsfile_creation_date(user1, user2): print("Mentions file creation date for " + str(user1) + " and " + str(user2)) if not os.path.exists('data/' + str(user1) + ' mentioning ' + str(user2) + '.csv'): creationdate = None else: creationdate = datetime.datetime.fromtimestamp(os.path.getmtime('data/' + str(user1) + ' mentioning ' + str(user2) + '.csv')).strftime('%Y-%m-%d') print("is " + str(creationdate)) return creationdate def subtract_days(filecreationdate, lastmentiondate): if lastmentiondate: make_filecreationdate_datetime = datetime.datetime.strptime(filecreationdate, '%Y-%m-%d') make_lastmentiondate_datetime = datetime.datetime.strptime(lastmentiondate, '%Y-%m-%d') subtracted = make_filecreationdate_datetime - make_lastmentiondate_datetime return subtracted.days elif lastmentiondate == 'None': return None elif lastmentiondate == None: return None def get_favoritesfile_creation_date(user): print("Favorites file creation date for " + str(user)) if not os.path.exists('data/' + str(user) + " favorites.csv"): creationdate = None else: creationdate = datetime.datetime.fromtimestamp(os.path.getmtime('data/' + str(user) + " favorites.csv")).strftime('%Y-%m-%d') print("is " + str(creationdate)) return creationdate def getreinforcementpercentxrecency(previoususer, user, reinforcementpercent): if str(lastdateprevioususerfavoriteduser(previoususer,user)) != 'None': days = subtract_days(get_favoritesfile_creation_date(previoususer), lastdateprevioususerfavoriteduser(previoususer,user)) score_recency_stored_days = score_recency(days) elif str(lastdateprevioususerfavoriteduser(previoususer,user)) == 'None': days = 0 score_recency_stored_days = 0 result = float(reinforcementpercent) * score_recency_stored_days #f = open('show me da money.txt', 'a') #f.write('\n'+ str(previoususer) + '\n' + str(user) + '\n' + 'days: ' + str(days) + '\n' + 'creation date: ' + str(get_favoritesfile_creation_date(previoususer)) + '\n' + 'last date ' + str(previoususer) + ' favorited' + str(user) + ' ' + str(lastdateprevioususerfavoriteduser(previoususer,user)) + '\n' + str(result) + '\n' + 'reinforcementpercent: ' + str(reinforcementpercent) + '\n' + 'recency score: ' + str(score_recency_stored_days) + '\n' + 'result: ' + str(result) + '\n') #f.close() return result def weighted_interaction_score(a, b, c, d): a = float(a) b = float(b) #a = times user 1 mentioned user 2 #b = times user 2 mentioned user 1 #c = days passed since user 1 last mentioned user 2 (file creation date - last mention date) #d = days passed since user 2 last mentioned user 1 (file creation date - last mention date) if c == None: c = 0 score_recency_stored_c = 0 elif c != None: score_recency_stored_c = score_recency(c) if d == None: d = 0 score_recency_stored_d = 0 elif d != None: score_recency_stored_d = score_recency(d) print('User 1 mentioned User 1: ', str(int(a)), ' times') print('User 2 mentioned User 1: ', str(int(b)), ' times') print('User 1 last mentioned User 2: ', str(int(c)), ' days ago') print('User 2 last mentioned User 1: ', str(int(d)), ' days ago') if a <= 1 and b <= 1: result = 0 if a != 0 or b != 0: result = format(float(-(((abs((a-b)/max(a,b)))) - 1)), 'f') #result = float(-(((abs((a-b)/max(a,b)))) - 1)) if float(result) <= 0: result = 0 #now score mention number result = float(result) * score_mentions_number(a) result = float(result) * score_mentions_number(b) #now score recency result = float(result) * score_recency_stored_c result = float(result) * score_recency_stored_d print("Interaction score is: ", float(result)) if result == -0: result = 0 #if int(str(result)[0]) > return result def get_weighted_interactions_score_light(user1, user2, countoftimesuser1mentioneduser2, countoftimesuser2mentioneduser1, lastmentioneddate_foruser1touser2, lastmentioneddate_foruser2touser1): creationdate_foruser1touser2 = get_mentionsfile_creation_date(user1, user2) if creationdate_foruser1touser2 == None: days_since_user1_mentioned_user2 = None else: days_since_user1_mentioned_user2 = subtract_days(creationdate_foruser1touser2, lastmentioneddate_foruser1touser2) creationdate_foruser2touser1 = get_mentionsfile_creation_date(user2, user1) if creationdate_foruser2touser1 == None: days_since_user2_mentioned_user1 = None #subtract those dates - d else: days_since_user2_mentioned_user1 = subtract_days(creationdate_foruser2touser1, lastmentioneddate_foruser2touser1) if float(countoftimesuser1mentioneduser2) == 0: countoftimesuser1mentioneduser2 = 0 if float(countoftimesuser2mentioneduser1) == 0: countoftimesuser2mentioneduser1 = 0 return weighted_interaction_score(float(countoftimesuser1mentioneduser2), float(countoftimesuser2mentioneduser1), days_since_user1_mentioned_user2, days_since_user2_mentioned_user1) def get_weighted_interactions_score(user1, user2): #get count of number of times user 1 mentioned user 2 - a countoftimesuser1mentioneduser2 = counttimesprevioususermentioneduser(user1, user2) #get count of number of times user 2 mentioned user 1 - b countoftimesuser2mentioneduser1 = counttimesprevioususermentioneduser(user2, user1) #get dates for user 1 mentioning user 2 lastmentioneddate_foruser1touser2 = lastdateprevioususermentioneduser(user1, user2) creationdate_foruser1touser2 = get_mentionsfile_creation_date(user1, user2) if creationdate_foruser1touser2 == None: days_since_user1_mentioned_user2 = None #subtract those dates - c else: days_since_user1_mentioned_user2 = subtract_days(creationdate_foruser1touser2, lastmentioneddate_foruser1touser2) #get dates for user 2 mentioning user 1 lastmentioneddate_foruser2touser1 = lastdateprevioususermentioneduser(user2, user1) creationdate_foruser2touser1 = get_mentionsfile_creation_date(user2, user1) if creationdate_foruser2touser1 == None: days_since_user2_mentioned_user1 = None #subtract those dates - d else: days_since_user2_mentioned_user1 = subtract_days(creationdate_foruser2touser1, lastmentioneddate_foruser2touser1) return weighted_interaction_score(countoftimesuser1mentioneduser2, countoftimesuser2mentioneduser1, days_since_user1_mentioned_user2, days_since_user2_mentioned_user1) def getreinforcementpercentage(user, previoususerfavorites_ordereddict): if user not in previoususerfavorites_ordereddict.keys(): return 0.0 elif user in previoususerfavorites_ordereddict.keys(): sumoffavoritesnumbers = sum(previoususerfavorites_ordereddict.values()) return previoususerfavorites_ordereddict.get(user) / sumoffavoritesnumbers #return -((previoususerfavorites_ordereddict.get(user) / sumoffavoritesnumbers) - 1) #return -((list(previoususerfavorites_ordereddict.keys()).index(user) / len(list(previoususerfavorites_ordereddict.keys()))) -1) listoffound = [] if int(user_input) == 1: if not os.path.exists('data/'): os.mkdir('data/') if not os.path.exists('results/'): os.mkdir('results/') #print("Import CSV (csv has to be 1 column of users with 'username' header)", [...]) #Get starting username (e.g. 'person a') user_source = input("Please input starting username (e.g. if you want to reach Person B from Person A, enter Person A’s username. If you want to reach Person A from Person B, enter Person B's username): " ) print("Starting username is:" + str(user_source)) #Get target username (e.g.'mojojojo') user_target = input("Please input target username (e.g. if you want to reach Person B from Person A, enter Person B's username. If you want to reach Person A from Person B, enter Person A's username): " ) print("Target username to reach is:" + str(user_target)) whichdegree = input('Input a number: \n1. 1st Degree? \n2. 2nd degree? \n3. 3rd degree?\n4. 4th degree?\n5. 5th degree? ') #if whichdegree != int(1): if os.path.exists('results/' + 'First Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): #whichdegree = input('Input a number: \n1. 1st Degree? \n2. 2nd degree? \n3. 3rd degree?\n4. 4th degree?\n5. 5th degree? ') skipfirst = int(whichdegree) if int(skipfirst) == 1: #continue adding to first degree skipfirst = 1 skipthird = False if int(skipfirst) == 2: #skip first degree only skipfirst = 2 skipthird = False if int(skipfirst) == 3: #skip first and second degree and go to third degree skipfirst = 3 skipsecond = 2 skipthird = False print("Skipping 1st and 2nd degree...") if int(skipfirst) == 4: skipfirst = 4 skipsecond = 2 skipthird = True skipfourth = False if int(skipfirst) == 5: skipfirst = 5 skipsecond = 2 skipthird = True skipfourth = True if skipfirst == 1: if os.path.exists('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): skipsecond = input("\n2nd Degree for " + str(user_target) + " and " + str(user_source) + '.csv' + ' found. Input a number: 1. Continue adding to it or 2. skip to 3rd degree? : ') if int(skipsecond) == 1: print("Will continue adding to second degree for " + str(user_target) + " and " + str(user_source)) if int(skipsecond) == 2: print("Will skip second degree for " + str(user_target) + " and " + str(user_source)) skipthird = False elif not os.path.exists('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): skipsecond = 1 if skipfirst == 2: if os.path.exists('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): skipsecond = input("\n2nd Degree for " + str(user_target) + " and " + str(user_source) + '.csv' + ' found. Input a number: 1. Continue adding to it or 2. skip to 3rd degree? : ') if int(skipsecond) == 1: print("Will continue adding to second degree for " + str(user_target) + " and " + str(user_source)) if int(skipsecond) == 2: print("Will skip second degree for " + str(user_target) + " and " + str(user_source)) elif not os.path.exists('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): skipsecond = 1 if int(skipfirst) <= 3: if os.path.exists('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers_thirddegree = [] for row in csv_reader: columnofusers_thirddegree.append(str(row[2])) skiplastuser = input('\n\n3rd Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv' + ' found.\n' + '\nLast username: ' + str(columnofusers_thirddegree[len(columnofusers_thirddegree) -1]) + '\n' + '\nInput a number:\n' + '1. Continue adding to ' + str(columnofusers_thirddegree[len(columnofusers_thirddegree) -1]) + '\n2. skip them? : ') elif not os.path.exists('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): skiplastuser = 1 if int(skipfirst) == 4: if os.path.exists('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers_fourthdegree_userone = [] columnofusers_fourthdegree_usertwo = [] for row in csv_reader: columnofusers_fourthdegree_userone.append(str(row[2])) columnofusers_fourthdegree_usertwo.append(str(row[12])) skiplastuser_fourthdegree = int(input('\n\n4th Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv' + ' found.\n' + '\nLast User 1: ' + str(columnofusers_fourthdegree_userone[len(columnofusers_fourthdegree_userone) -1]) + '\n' + '\nInput a number:\n' + '1. Continue adding to last User 1: ' + str(columnofusers_fourthdegree_userone[len(columnofusers_fourthdegree_userone) -1]) + '\n2. Skip last user 1?' + '\n3. Continue adding to last User 2: ' + str(columnofusers_fourthdegree_usertwo[len(columnofusers_fourthdegree_usertwo) -1]) + '\n4. Skip last user 2: ' + str(columnofusers_fourthdegree_usertwo[len(columnofusers_fourthdegree_usertwo) -1]) + '?' )) if int(skipfirst) == 5: if os.path.exists('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers_fifthdegree_userone = [] columnofusers_fifthdegree_usertwo = [] columnofusers_fifthdegree_userthree = [] for row in csv_reader: columnofusers_fifthdegree_userone.append(str(row[2])) columnofusers_fifthdegree_usertwo.append(str(row[12])) columnofusers_fifthdegree_userthree.append(str(row[22])) skiplastuser_fifthdegree = input('\n\n5th Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv' + ' found.\n' + '\nLast User 1: ' + str(columnofusers_fifthdegree_userone[len(columnofusers_fifthdegree_userone) -1]) + '\n' + '\nInput a number:\n' + '1. Continue adding to last User 1: ' + str(columnofusers_fifthdegree_userone[len(columnofusers_fifthdegree_userone) -1]) + '\n2. Skip last user 1?' + '\n3. Continue adding to last User 2: ' + str(columnofusers_fifthdegree_usertwo[len(columnofusers_fifthdegree_usertwo) -1]) + '\n4. Skip last user 2: ' + str(columnofusers_fifthdegree_usertwo[len(columnofusers_fifthdegree_usertwo) -1]) + '?' + '\n5. Continue adding to last User 3: ' + str(columnofusers_fifthdegree_userthree[len(columnofusers_fifthdegree_userthree) -1]) + '\n6. Skip last user 3: ' + str(columnofusers_fifthdegree_userthree[len(columnofusers_fifthdegree_userthree) -1]) + '?' ) else: skipfirst = '90' skipsecond = 1 #USER TARGET: Check if there's a csv file with a list of all the target user (e.g. Person B) is following if os.path.exists('data/' + str(user_target) + " following.csv"): print('Found ' + str(user_target) + " following.csv in current folder") followingcount(user_target) print("Found following list for " + str(user_target)) print("Going to compare with current number") #Open counted following csv with open('data/' + str(user_target) + ' fcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fcounter = [] for row in csv_reader: fcounter.append(row[9]) print(int(fcounter[-1])) with open('data/' + str(user_target) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_count1 = sum(1 for row in csv_reader) #compare the number of rows (users - row_count) in the CSV with the number of current followers (from twint - frcounter) relatively (if they are within 10 followers) if abs(int(row_count1) - int(fcounter[-1])) <= 2: #if math.isclose(row_count1,int(fcounter[0]), rel_tol=2): print('Following count is relatively the same (give or take 2)') print('row_count from existing CSV is : ' + str(row_count1) + ' and fcounter is: ' + str(fcounter[-1])) #if the number of rows is not within 2 of the current number of followers, download a CSV of followers else: print('Following count not the same' + 'row_count1 is ' + str(row_count1) + 'fcounter is ' + str(fcounter[-1])) print("Removing existing following file...") os.remove('data/' + str(user_target) + " following.csv") print('Downloading following for ' + str(user_target)) downloadfollowing(user_target) else: downloadfollowing(user_target) #USER TARGET - Check if CSV file of tweets is in the folder for the target user (why? to count mentions and check if user was mentioned by target user (e.g. Person B)) #downloadtweets(user_target) targetusermentionscount, targetusermentionslist = makementionslist(user_target) targetuserfollowinglist = makefollowinglist(user_target) print("Phase 2: Making following list for " + str(user_target) + ". Counting favorited users.") targetuserfavoriteslist = makefavoriteslist(user_target) print('Target user favorited users:') print(targetuserfavoriteslist) targetuserinteractions, countedandfollowing, targetuserfavorites_ordereddict = makeinteractionslists(user_target,targetuserfollowinglist,targetuserfavoriteslist,targetusermentionslist) #USER SOURCE - Check if the CSV file of followers for the inputted user source (e.g. person a) exists if os.path.exists('data/' + str(user_source) + " followers.csv"): followerscount(user_source) with open('data/' + str(user_source) + ' frcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) frcounter = [] for row in csv_reader: frcounter.append(row[10]) print(int(frcounter[0])) with open('data/' + str(user_source) + ' followers.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_count = sum(1 for row in csv_reader) print('row_count is ' + str(row_count)) print('frcounter is ' + str(frcounter[0])) #compare the number of rows (users - row_count) in the CSV with the number of current followers (from twint - frcounter) relatively (if they are within 10 followers) #if abs(int(row_count) - int(frcounter[0])) <= 50: if abs(int(row_count) - int(frcounter[0])) <= 100: print('Follower count is relatively the same (give or take 10)') print('row_count from existing CSV is : ' + str(row_count) + ' and frcounter is: ' + str((frcounter[0]))) #if the number of rows is not within 10 of the current number of followers, download a CSV of followers else: print('Follower count not the same' + 'row_count is ' + str(row_count) + 'frcounter is ' + str(frcounter)) print('Downloading followers for ' + str(user_source)) print("Moving " + str(user_source) + " followers.csv") os.rename('data/' + str(user_source) + " followers.csv", 'death/' + str(user_source) + " followers.csv") downloadfollowers(user_source) else: print("Did not find " + str(user_source) + " csv file in current folder") downloadfollowers(user_source) sourceuserfollowerslist = makefollowerslist(user_source) header_added1 = dontaddheaderiffileexists(1) header_added2 = dontaddheaderiffileexists(2) header_added3 = dontaddheaderiffileexists(3) header_added4 = dontaddheaderiffileexists(4) header_added5 = dontaddheaderiffileexists(5) header_added = False header_added2 = False #header_added4 = False notfound = [] alreadyinsecond = [] checker1 = [] checker2 = [] checker3 = [] for u in targetuserinteractions: if int(skipfirst) == 2 or int(skipfirst) == 3: if u in targetuserinteractions: continue if u not in sourceuserfollowerslist: print(str(u) + " not found in" + str(user_source) + "'s followers") continue if u in sourceuserfollowerslist: s = u listoffound.append(s) continue #print('listoffound: ', listoffound) for e in listoffound: s = e if os.path.exists('results/' + 'First Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'First Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='r') as csv_reader: checker = [] for row in csv_reader: #Open in the 11th column to see number of likes checker.append(row[2]) if s in checker: continue reinforcementpercentage = list(targetuserinteractions.keys()).index(s) / len(list(targetuserinteractions.keys())) reinforcementpercent = -(reinforcementpercentage - 1) connectiontype = [] if s in targetuserfollowinglist: connectiontype.append('Follows') followedby = 'Yes' elif s not in targetuserfollowinglist: followedby = 'No' if s in targetusermentionscount or targetusermentionslist: connectiontype.append("Mentioned") print("Found " + str(s) + "in mentions (targetusermentionslist)") if s in targetuserfavoriteslist: connectiontype.append('Favorited') print("Found " + str(s) + " in favorited users (targetuserfavoriteslist)") print(str(user_target) + " has liked " + str(s) + ' ' + str(targetuserinteractions.get(s)) + ' time(s).') if reinforcementpercent > 0.97: likesbyS = "Pending" if reinforcementpercent < 0.97: likesbyS = 'Not counting. Reinforcement for ' + str(user_target) + ' too low.' if s not in targetuserfavoriteslist: likesbyS = '0' #if s in targetuserinteractionsandfollowing: #followedby = 'Yes' #elif s not in countedandfollowing: #followedby = 'No' if s in sourceuserfollowerslist: follows_source = 'Follows' elif s not in sourceuserfollowerslist: follows_source = "Doesn't follow" if s in countedandfollowing: #if one of the users is being followed by Person B #connectiontype.append('Follows') print('Found something...' + '\nSaving non-reinforcement factored in list in txt file') print(str(user_target) + ' ' + str(connectiontype) + ' ' + str(s) + ' who is following ' + str(user_source)) f = open('data/' + 'First Degree of ' + str(user_source) + ' to ' + str(user_target) + '.txt', 'a') f.write(str(user_target) + ' ' + str(connectiontype) + ' ' + str(s) + ' who is following ' + str(user_source) + '\n') f.close() elif s in targetuserfollowinglist and targetuserinteractions and reinforcementpercent > 0.98: print('Found something with higher than 98 reinforcement...' + '\nSaving non-reinforcement factored in list in txt file') print(str(user_target) + ' ' + str(connectiontype) + ' ' + str(s) + ' who is following ' + str(user_source)) f = open('data/' + 'First Degree of ' + str(user_source) + ' to ' + str(user_target) + '.txt', 'a') f.write(str(user_target) + ' ' + str(connectiontype) + ' ' + str(s) + ' who is following ' + str(user_source) + '\n') f.close() print("Now factoring in reinforcement... ") print("List of found: " + str(listoffound)) downloadtweets_by_specific_user_mentioning(user_target, s) if os.path.exists('data/' + str(user_target) + " mentioning " + str(s) + '.csv'): with open('data/' + str(user_target) + " mentioning " + str(s) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_countFT = sum(1 for row in csv_reader) print("Counting how many times " + str(user_target) + " has mentioned " + str(s) + "...") else: print("Found no folder called " + str(user_target) + " mentioning " + str(s) + '.csv\n' + 'Probability is mentions are 0, setting to 0.') row_countFT = str(0) downloadtweets_by_specific_user_mentioning(str(s), str(user_source)) if os.path.exists('data/' + str(s) + " mentioning " + str(user_source) + '.csv'): with open('data/' + str(s) + " mentioning " + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) row_countS = sum(1 for row in csv_reader) elif not os.path.exists('data/' + str(s) + " mentioning " + str(user_source) + '.csv'): print("Found no folder called " + str(s) + " mentioning " + str(user_source) + '.csv\n' + 'Probability is mentions are 0, setting to 0.') row_countS = str(0) notfound.append(s) else: print("Found no folder called " + str(s) + " mentioning " + str(user_source) + '.csv\n' + 'Probability is mentions are 0, setting to 0.') row_countS = str(0) with open('results/' + 'First Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added1: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Liked Users','Reinforcement Percentage','>','Source User', 'Times ' + str(user_source) + ' was favorited by User 1', 'Reinforcement Rank', 'Times User 1 mentioned ' + str(user_source)]) header_added1 = True #dont know if next line particularly counted[s] will work - goal is to find value for every specific key found / cross referenced - for reinforcement rank i need some kind of enumeration / enumerate thing that indexes every key or transforms it into an array then indexes it csv_writer.writerow([user_target, str(connectiontype).replace("[","").replace("]","").replace("'",""), str(s), str(targetuserinteractions.get(s)), followedby, str(targetusermentionscount.get(s)), str(list(targetuserinteractions.keys()).index(s)), str(list(targetuserinteractions.keys()).index(s)) + '/' + str(len(list(targetuserinteractions.keys()))), reinforcementpercent, follows_source, str(user_source), likesbyS, 'Pending', str(row_countS)]) print("Checking if " + str(s) + " favorites.csv is in the current folder...") continue for e in listoffound: s = e if os.path.exists('data/' + str(s) + " favorites.csv"): print("Found " + str(s) + " favorites.csv in current folder") #Now download the number of likes of the user s to compare to the number of rows in the CSV of favorites favoritescount(s) with open('data/' + str(s) + ' favcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #create an array called 'favcounter' to store number of likes in favcounter = [] for row in csv_reader: #Open in the 11th column to see number of likes favcounter.append(row[11]) #print an integer of the number of likes you found after downloading it print("Current number of favorites by user " + str(s) + ' is:') print(int(favcounter[0])) #Now open user s favorites CSV and count the number of rows with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_count2 = sum(1 for row in csv_reader) print("Number of favorited tweets by user " + str(s) + ' in the current CSV file is:') print(str(row_count2)) #if both numbers are relatively close (within 50) - do this: print("Comparing numbers... (Checking to see if they're within 50 favorites)", [...]) if abs(int(row_count2) - int(favcounter[0])) <= 50: print('Favorite count is relatively the same (give or take 50 favorites)') print('row_count from existing CSV is : ' + str(row_count2) + 'and favcounter is: ' + str(favcounter)) #if the number of rows is not within 50 of the current number of favorites, download a CSV of followers else: print('Favorite count not the same (give or take 50 favorites) ' + 'row_count2 is ' + str(row_count2) + 'favcounter is ' + str(favcounter)) print("Checking if it's worth it to download by approximating " + str(user_target) + "'s reinforcement towards account'") with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fvucounterS = [] for row in csv_reader: fvucounterS.append(row[7]) print('Saving favorited users for ' + str(s) + ' in a dict') f = open('Savingfavusersdict2S.txt', 'w') f.write(str(fvucounterS)) f.close() print(str(fvucounterS)) print('Counting frequency of likes for every user liked by ' + str(s)) countedX = OrderedDict(Counter(fvucounterS).most_common()) likesbyS = countedX.get(user_source) else: print('Did not find ' + str(s) + ' favorites.csv file in current folder...') print("Checking if it's worth it to download by approximating " + str(user_target) + "'s reinforcement towards account'") reinforcementpercentage = list(targetuserinteractions.keys()).index(s) / len(list(targetuserinteractions.keys())) reinforcementpercent = -(reinforcementpercentage - 1) if reinforcementpercent < 0.80: #too few likes by target user to make it worth counting. Less than 80% likesbyS = str(s) + ' liked too few times by ' + str(user_target) elif reinforcementpercent > 0.80: downloadfavorites(s) #Reinforcement: Open s's favorites, find usernames column, save to an Ordered Dict, count and sort liked users by frequency of occurence in an ordered Dict with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fvucounterS = [] for row in csv_reader: fvucounterS.append(row[7]) print('Saving favorited users for ' + str(s) + ' in a dict') f = open('Savingfavusersdict2S.txt', 'w') f.write(str(fvucounterS)) f.close() print(str(fvucounterS)) print('Counting frequency of likes for every user liked by ' + str(s)) countedX = OrderedDict(Counter(fvucounterS).most_common()) likesbyS = countedX.get(user_source) #downloadfavorites(s) downloadfollowing(s) print("Phase 1: Making following list for " + str(s)) #Open user s following CSV file and save every row after header to an array with open('data/' + str(s) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) slist = [] for row in csv_reader: slist.append(row[0]) print('Saving following list for ' + str(s) + ' in an array') print("Phase 2: Now making following list for " + str(s) + ". Counting favorited users.") with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) targetuserfavoriteslist = [] for row in csv_reader: targetuserfavoriteslist.append(row[7]) print('Saving favorited users for ' + str(s) + ' in a dict') print(str(targetuserfavoriteslist)) print('Counting frequency of likes for every user liked by ' + str(s)) counteds = OrderedDict(Counter(targetuserfavoriteslist).most_common()) for u in slist: if u not in counteds: counteds.update({u: 0}) print("List (dict) of users " + str(s) + " is following and how many times " + str(s) + " liked those users:") print(counteds) with open('results/' + 'First Degree - Reinforcement Factored.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if header_added2 == False: csv_writer.writerow(['Target Username', 'Follows','User 1','Times favorited by ' + str(user_target),'Reinforcement Rank','Follows','Source User', 'Times favorited by ' + str(s), 'Reinforcement Rank']) header_added2 = True #dont know if next line particularly counted[s] will work - goal is to find value for every specific key found / cross referenced - for reinforcement rank i need some kind of enumeration / enumerate thing that indexes every key or transforms it into an array then indexes it csv_writer.writerows([user_target, '>', str(s), str(targetuserinteractions.get(s)), 'Reinforcement rank pending', '>', str(user_source), targetuserinteractions.get(user_source)]) reinforcementpercentage = list(targetuserinteractions.keys()).index(s) / len(list(targetuserinteractions.keys())) reinforcementpercent = -(reinforcementpercentage - 1) if reinforcementpercent < 0.80: likesbyS = str(s) + ' liked too few times by ' + str(user_target) elif reinforcementpercent > 0.80: if os.path.exists('data/' + str(s) + " favorites.csv"): with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fvucounterS = [] for row in csv_reader: fvucounterS.append(row[7]) print('Saving favorited users for ' + str(s) + ' in a dict') #save dict in text file print('Saving in text file') f = open('SavingfavusersdictS.txt', 'w') f.write(str(fvucounterS)) f.close() print(str(fvucounterS)) print('Counting frequency of likes for every user liked by ' + str(s)) countedX = OrderedDict(Counter(fvucounterS).most_common()) print("Following list (dict) is counted and is: " + str(countedX)) f = open('SavingfavusersdictcountedS.txt', 'w') f.write(str(countedX)) f.close() likesbyS = countedX.get(user_source) else: print('Did not find ' + str(user_target) + ' favorites.csv file in current folder...') downloadfavorites(user_target) #Reinforcement: Open target's favorites, find usernames column, save to an Ordered Dict, count and sort liked users by frequency of occurence in an ordered Dict with open('data/' + str(s) + ' favorites.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fvucounterS = [] for row in csv_reader: fvucounterS.append(row[7]) print('Saving favorited users for ' + str(s) + ' in a dict') f = open('Savingfavusersdict2.txt', 'w') f.write(str(fvucounterS)) f.close() print(str(fvucounterS)) print('Counting frequency of likes for every user liked by ' + str(s)) countedX = OrderedDict(Counter(fvucounterS).most_common()) likesbyS = countedX.get(user_source) with open('results/' + 'First Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added1: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Liked Users','Reinforcement Percentage','>','Source User', 'Times ' + str(user_source) + ' was favorited by User 1', 'Reinforcement Rank']) header_added1 = True #dont know if next line particularly counted[s] will work - goal is to find value for every specific key found / cross referenced - for reinforcement rank i need some kind of enumeration / enumerate thing that indexes every key or transforms it into an array then indexes it csv_writer.writerow([user_target, str(connectiontype).replace("[","").replace("]","").replace("'",""), str(s), str(targetuserinteractions.get(s)), followedby, str(targetusermentionscount.get(s)), str(list(targetuserinteractions.keys()).index(s)), str(list(targetuserinteractions.keys()).index(s)) + '/' + str(len(list(targetuserinteractions.keys()))), reinforcementpercent, follows_source, str(user_source), likesbyS]) if s not in targetuserinteractions: print('RESULT: None of the people followed by ' + str(user_target) + ' are following ' + str(user_source)) #Second Degree if int(skipsecond) == 2: print("Skipping 2nd Degree to 3rd...") if int(skipsecond) == 1: print('Now trying 2nd Degree...') for userone in targetuserinteractions: #if target user is mojojojo, targetuserinteractions is a dict of who mojojojo has favorited, followed, mentioned ordered by most favorited, t would be buttercup if int(skipsecond) == 2: if userone in targetuserinteractions: #- this line helps me skip to 3rd degree continue if os.path.exists('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV for row in csv_reader: alreadyinsecond.append(row[2]) if userone in alreadyinsecond: header_added2 = True print("Found " + str(userone) + " already in Second Degree CSV. Skipping...") continue reinforcementpercentage = list(targetuserinteractions.keys()).index(userone) / len(list(targetuserinteractions.keys())) reinforcementpercent = -(reinforcementpercentage - 1) connectiontype = [] if userone in targetuserfollowinglist: connectiontype.append('Follows') followedby = 'Yes' elif userone not in targetuserfollowinglist: followedby = 'No' if userone in targetuserfavoriteslist: connectiontype.append('Favorited') print("Found " + str(userone) + " in favorited users (targetuserfavoriteslist)") print(str(user_target) + " has liked " + str(userone) + ' ' + str(targetuserinteractions.get(userone)) + ' time(s).') if userone in targetusermentionscount: connectiontype.append("Mentioned") print("Found " + str(userone) + "in mentions (targetusermentionslist)") if reinforcementpercent > 0.97: likesbyS = "Pending" #downloadfavorites(userone) or check if it exists, then put all users in list and then count occurences - to show how much those users were liked by the top users followed by target user (bubbles) if reinforcementpercent < 0.97: likesbyS = 'Not counting. Reinforcement for ' + str(user_target) + ' too low.' if userone not in targetuserfavoriteslist: likesbyS = '0' #Download the number of 'following' from each user in the targetuserinteractions list (combined list of followed, favorited, or mentioned) ordered by number of favorites (of the target user (e.g. Mojo Jojo)) followingcount(userone) #Open the file of each user's downloaded following number/count if not os.path.exists('data/' + str(userone) + ' fcount.csv'): print("Didn't find " + str(userone)) notfound = [] notfound.append(userone) continue with open('data/' + str(userone) + ' fcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fcounter = [] for row in csv_reader: fcounter.append(row[9]) print(int(fcounter[-1])) #if you find that the user is following more than 7,000 people, skip them in this loop if int(fcounter[-1]) > 7000: print("Following is greater than 10,000. Appending to list to use later but using only likes and mention frequencies") followingcounthigh = [] followingcounthigh.append(userone) f = open('Users skipped due to their following being greater than 7K for ' + str(user_target) + ' and ' + str(user_source) + '.txt', 'a') f.write('Users saved in list called: followingcounthigh\n' + str(userone)) f.close() continue if os.path.exists('data/' + str(userone) + " following.csv"): print('Found ' + str(userone) + " following.csv in current folder") print("Found following list for " + str(userone)) print("Going to compare with current number") #Open CSV that has the number of people the t user is following with open('data/' + str(userone) + ' fcount.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) fcounter = [] #make a list and inside it save the number of people the t user is following for row in csv_reader: fcounter.append(row[9]) print(int(fcounter[-1])) with open('data/' + str(userone) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV row_count1 = sum(1 for row in csv_reader) #compare the number of rows (users - row_count) in the CSV with the number of current followers (from twint - frcounter) relatively (if they are within 10 followers) if abs(int(row_count1) - int(fcounter[-1])) <= 20 or int(row_count1) > int(fcounter[-1]): print('Following count is relatively the same (give or take 20) or row_count is larger') print('row_count from existing CSV is : ' + str(row_count1) + ' and fcounter is: ' + str(fcounter[-1])) #if the number of rows is not within 2 of the current number of followers, download a CSV of followers else: print('Following count not the same\n' + 'row_count1 is ' + str(row_count1) + '\nfcounter is ' + str(fcounter[-1])) print('Downloading following for ' + str(userone)) downloadfollowing(userone) else: downloadfollowing(userone) if not os.path.exists('data/' + str(userone) + ' following.csv'): print("Didn't find " + str(userone)) notfound = [] notfound.append(userone) f = open('Not found' + ' for ' + str(user_target) + ' and ' + str(user_source) + '.txt', 'a') f.write('Skipped due to no following or no account: ' + str(userone)) f.close() continue with open('data/' + str(userone) + ' following.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) utlist = [] for row in csv_reader: utlist.append(row[0]) #utlist is: Every user followed by user T e.g. bubbles (who's followed by the target user, e.g. buttercup) print('utlist is: ' + str(userone)) print('Saving following list for ' + str(userone) + ' in an array') for e in sourceuserfollowerslist: s = e #print(s) #if users following source user (e.g. person a) are in the t-user's following list, print the chain and export it in a txt document - later going to count account likes and export in csv\ connectiontype2 = [] if s in utlist: connectiontype2.append('Follows') if s in utlist: print('Found something...' + '\nSaving non-reinforcement factored in list in txt and csv file') print(str(user_target) + ' is following ' + str(userone) + ' who is following ' + str(s) + ' who is following ' + str(user_source)) #test if second degree iterates with different numbers or if its necessary to even do that - more desirable result would be csv with all results f = open('seconddegree{0}.txt', 'a') f.write(str(user_target) + ' is following ' + str(userone) + ' who is following ' + str(s) + ' who is following ' + str(user_source) + '\n') f.close() timesuseronementionedusertwo = counttimesprevioususermentioneduser(userone, s) timesusertwomentionedsourceuser = counttimesprevioususermentioneduser(s, user_source) print("Put all elements in Ordered Dict with key being username and value being number of times user was mentioned") with open('results/' + 'Second Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added2: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Liked Users','Reinforcement Percentage','>','User 2','>','Source User', 'Times ' + str(user_source) + ' was favorited by User 2', 'Reinforcement Rank', 'Times User 1 mentioned User 2', 'Times ' + str(user_source) + ' was mentioned by User 2']) #csv_writer.writerow(['Target Username', 'Follows','User 1','Follows','User 2', 'Follows', 'Source User']) header_added2 = True #csv_writer.writerow([user_target, '>', str(userone), '>', str(s), '>', str(user_source)]) csv_writer.writerow([user_target, str(connectiontype).replace("[","").replace("]","").replace("'",""), str(userone), str(targetuserinteractions.get(userone)), followedby, str(targetusermentionscount.get(userone)), str(list(targetuserinteractions.keys()).index(userone)), str(list(targetuserinteractions.keys()).index(userone)) + '/' + str(len(list(targetuserinteractions.keys()))), reinforcementpercent, str(connectiontype2).replace("[","").replace("]","").replace("'",""), str(s), 'Follows', str(user_source), 'Pending', 'Pending', str(timesuseronementionedusertwo), str(timesusertwomentionedsourceuser)]) #print('utlist is: ' + str(utlist)) #Have yet to count favorites for each link in the chain - need to update the list - could do it by just reading the CSV OR - at say six degrees, or the very end, find the top users, and only get the top selection of users (who have a reinforcement rank of 1.0 ), then count favorites for every link in the chain there #Now trying 3rd degree if skipthird == False: print('Now trying 3rd Degree...') #If target user is mojojojo, for every user in mojo jojo's favorited, mentioned, and followed users for userone in targetuserinteractions: if skipthird == True: if userone in targetuserinteractions: #- this line helps me skip to next degree continue if os.path.exists('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers = [] for row in csv_reader: columnofusers.append(str(row[2])) for entry in columnofusers: if str(entry) == "User 1": columnofusers.remove(entry) if int(skiplastuser) == 1: # if it's 1, keep adding to the last user in User 1 list skiplastuser = 2 if str(columnofusers[len(columnofusers) -1]) == str(columnofusers[len(columnofusers) -2]): #if last element is the same as the element before that for userone in columnofusers[:-1]: continue elif str(columnofusers[len(columnofusers) -1]) != str(columnofusers[len(columnofusers) -2]): columnofusers.append(str(columnofusers[len(columnofusers) -1])) for u in columnofusers[:-1]: continue elif int(skiplastuser) == 2: # skip last user if userone in columnofusers: continue if checkifuserexists(userone) == False: continue #the function " checkifuserfollowedfavoritedmentionedbyprevioususer" takes 5 inputs and has 2 outputs. #Input 1: current username in loop #Input 2: Target user (Mojo Jojo) (or previous user) #Input 3: List of usernames mojo jojo is following (or previous user is following) #Input 4: List of usernames mojo jojo has favorited (or previous user has favorited) #Input 5: Dict of all usernames mojo jojo has interacted with #Output 1: Connection type (A list with: ['Follows', 'Favorited', 'Mentioned']) if those apply #Output 2: Followedby (A variable with either 'Yes', or 'No' to check if theyre followed by mojo jojo) connectiontypetargetusertouserone, followedby_targetusertouserone = checkifuserfollowedfavoritedmentionedbyprevioususer(userone, user_target, targetuserfollowinglist, targetuserfavoriteslist, targetuserinteractions) lastmentioned = lastdateprevioususermentioneduser(user_target, userone) lastfavorited = lastdateprevioususerfavoriteduser(user_target, userone) times_targetuser_mentioned_userone = counttimesprevioususermentioneduser(user_target, userone) print("Phase 1: Making following list for " + str(userone)) #Open user target following CSV file and save every row after header to an array #is it better to have this compressed or is it more useful to have them separate? #try: useronefollowinglist, useronefavorites, useronementionscount, useronementionslist, useroneinteractions, useroneinteractionsandfollowing, useronefavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(userone) #except TypeError: ####useronefollowinglist = makefollowinglist(userone) #useronefollowinglist is who the t user (user followed by target user - e.g. Mojo Jojo) is following (e.g. who buttercup is following)) ####useronementionscount, useronementionslist = makementionslist(userone) #Now combine all user interactions in one list. E.g. if target user is mojojojo, this would be who buttercup is following, favorited, and mentioned print("Phase 2: Making interactions list for " + str(userone) + ". Counting favorited users.") ####useronefavorites = makefavoriteslist(userone) #useroneinteractions should be named "useroneinteractions" ####useroneinteractions, useroneinteractionsandfollowing = makeinteractionslists(userone, useronefollowinglist, useronefavorites, useronementionslist) for usertwo in useroneinteractions: #if target user is mojojojo, useroneinteractions would be dict of who buttercup has followed, favorited and mentioned ordered by number of favorites, usertwo would be bubbles #lastfavorited2 = lastdateprevioususerfavoriteduser(t, usertwo) mentionedadded = False connectiontypeuseronetousertwo = [] usertwofollowingcount = getfollowingcount(usertwo) if skipuseriffollowingtoomanyandmentionslow(userone, usertwo, usertwofollowingcount) == False: continue connectiontypeuseronetousertwo, followedby_useronetousertwo = checkifuserfollowedfavoritedmentionedbyprevioususer(usertwo, userone, useronefollowinglist, useronefavorites, useroneinteractions) downloadfollowingifnotthere(usertwo) if not os.path.exists('data/' + str(usertwo) + " following.csv"): print('Did not find ' + str(usertwo) + ' following.csv file in data folder...') if skipifuserisprivate(usertwo) == True: continue usertwofollowingcount = getfollowingcount(usertwo) #if you find that the user is following more than 7,000 people, skip them in this loop if skipuseriffollowingtoomanyandmentionslow(userone, usertwo, usertwofollowingcount) == False: continue #continue if skipif_followinglist_doesntexist(usertwo) == True: continue usertwofollowinglist = makefollowinglist(usertwo) #usertwofollowinglist would be who bubbles (not buttercup) is following for userthree in usertwofollowinglist: #for every user in bubbles's following list (if the order is: mojojojo > buttercup (userone) > bubbles (x) > userthree) #if str(userthree) == "barackobama": #continue if userthree not in sourceuserfollowerslist: #if that user isn't being followed by the source user (e.g. person a) #print("Did not find " + str(userthree) + " in " + str(user_source) + " followers\n") #print("Skipping...\n") continue if userthree in sourceuserfollowerslist: #if you find one of those users (that bubbles is following) in person a's followers list e.g. ((if the order is: mojojojo > buttercup (userone) > bubbles (usertwo) > userthree > person a) useroneslotinchain = [] usertwoslotinchain = [] userthreeslotinchain = [] countof3users = 0 if os.path.exists('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV for row in csv_reader: useroneslotinchain.append(row[2]) usertwoslotinchain.append(row[12]) userthreeslotinchain.append(row[22]) if userone in useroneslotinchain: #alreadyinthird.append(userone) countof3users = 1 if usertwo in usertwoslotinchain: #alreadyinthird.append(x) countof3users = countof3users + 1 if userthree in userthreeslotinchain: #alreadyinthird.append(xf) countof3users = countof3users + 1 #if alreadyinthird == [t, x, xf]: if countof3users == 3: continue print("Found " + str(userthree) + " in " + str(user_source) + " followers") countofuseronementioningtargetuser = counttimesprevioususermentioneduser(userone, user_target) lastmentioned_useronetotargetuser = lastdateprevioususermentioneduser(userone, user_target) targetuser_userone_interaction_score = get_weighted_interactions_score_light(user_target, userone, times_targetuser_mentioned_userone, countofuseronementioningtargetuser, lastmentioned, lastmentioned_useronetotargetuser) countofuseronementioningusertwo = counttimesprevioususermentioneduser(userone, usertwo) if countofuseronementioningusertwo != str(0): if 'Mentioned' not in connectiontypeuseronetousertwo: connectiontypeuseronetousertwo.append('Mentioned') lastmentioned_useronetousertwo = lastdateprevioususermentioneduser(userone, usertwo) lastfavorited_useronetousertwo = lastdateprevioususerfavoriteduser(userone, usertwo) countofusertwomentioninguserone = counttimesprevioususermentioneduser(usertwo, userone) lastmentioned_usertwotouserone = lastdateprevioususermentioneduser(usertwo, userone) userone_usertwo_interaction_score = get_weighted_interactions_score_light(userone, usertwo, countofuseronementioningusertwo, countofusertwomentioninguserone, lastmentioned_useronetousertwo, lastmentioned_usertwotouserone) #if int(countofuseronementioningusertwo) > 0 and mentionedadded == False: #connectiontypeuseronetousertwo.append("Mentioned") #NEW ADDED #mentionedadded = True countofusertwomentioninguserthree = counttimesprevioususermentioneduser(usertwo, userthree) lastmentioned_usertwotouserthree = lastdateprevioususermentioneduser(usertwo, userthree) countofuserthreementioningusertwo = counttimesprevioususermentioneduser(userthree, usertwo) lastmentioned_userthreetousertwo = lastdateprevioususermentioneduser(userthree, usertwo) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #usertwo_userthree_interaction_score = interactionscore(countofusertwomentioninguserthree, countofuserthreementioningusertwo) #usertwo_userthree_interaction_score = get_weighted_interactions_score(usertwo, userthree) usertwo_userthree_interaction_score = get_weighted_interactions_score_light(usertwo, userthree, countofusertwomentioninguserthree, countofuserthreementioningusertwo, lastmentioned_usertwotouserthree, lastmentioned_userthreetousertwo) #tweets by evolvesustain mentioning person a) #countofuserthreementioningsourceuser countofuserthreementioningsourceuser = counttimesprevioususermentioneduser(userthree, user_source) lastmentioned_userthreetosourceuser = lastdateprevioususermentioneduser(userthree, user_source) countofsourceusermentioninguserthree = counttimesprevioususermentioneduser(user_source, userthree) lastmentioned_sourceusertouserthree = lastdateprevioususermentioneduser(user_source, userthree) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userfour_sourceuser_interaction_score = interactionscore(countofuserfourmentioningsourceuser, countofsourceusermentioninguserfour) userthree_sourceuser_interaction_score = get_weighted_interactions_score_light(userthree, user_source, countofuserthreementioningsourceuser, countofsourceusermentioninguserthree, lastmentioned_userthreetosourceuser, lastmentioned_sourceusertouserthree) chaininteraction_mention_score = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_sourceuser_interaction_score)))) chaininteraction_mention_score_without_sourceuser = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score))) chaininteraction_mention_score_added = float(targetuser_userone_interaction_score) + float(userone_usertwo_interaction_score) + float(usertwo_userthree_interaction_score) + float(userthree_sourceuser_interaction_score) #testing these chain interaction scores: reinforcementpercentxrecency_targetuser_userone = getreinforcementpercentxrecency(user_target, userone, getreinforcementpercentage(userone, targetuserfavorites_ordereddict)) reinforcementpercentxrecency_userone_usertwo = getreinforcementpercentxrecency(userone, usertwo, getreinforcementpercentage(usertwo, useronefavorites_ordereddict)) RRImultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(usertwo_userthree_interaction_score))) RRIImultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(usertwo_userthree_interaction_score) * (float(userthree_sourceuser_interaction_score)))) RIImultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score))) RIIImultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_sourceuser_interaction_score)))) with open('results/' + 'Third Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added3: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Last recorded favorite by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Last recorded mention by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 2','Times favorited by User 1','Last recorded favorite by User 1','Followed by User 1?','Times mentioned by User 1','Last mentioned by User 1','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 3','>','Source User', 'Times ' + str(user_target) + ' mentioned User 1', 'Last recorded mention of User 1 by ' + str(user_target),'Times User 1 mentioned ' + str(user_target), 'Last recorded mention of ' + str(user_target) + ' by User 1','Interaction score for ' + str(user_target) + ' and User 1','Times User 1 mentioned User 2', 'Last recorded mention of User 2 by User 1', 'Times User 2 mentioned User 1', 'Last recorded mention of User 1 by User 2', 'Interaction Score for User 1 and User 2','Times User 2 mentioned User 3', 'Last recorded mention of User 3 by User 2','Times User 3 mentioned User 2','Last recorded mention of User 3 by User 2', 'Interaction Score for User 2 and User 3','Times ' + str(user_source) + ' was mentioned by User 3','Last recorded mention of ' + str(user_source) + ' by User 3','Times ' + str(user_source) + ' mentioned User 3','Last recorded mention of User 3 by ' + str(user_source),'Interaction Score for User 3 and ' + str(user_source), 'Chain Interaction Score', 'Chain Interaction Score (Without User 3 and ' + str(user_source) + ')', 'Additive Chain Interaction Score', 'RRI Multiplicative Chain Score (without ' + str(user_source) + ')', 'RRII Multiplicative Chain Score', 'RII Multiplicative Chain Score (without ' + str(user_source) + ')', 'RIII Multiplicative Chain Score']) #csv_writer.writerow(['Target Username', 'Follows','User 1','Follows','User 2', 'Follows', 'Source User']) header_added3 = True #csv_writer.writerow([user_target, '>', str(userone), '>', str(s), '>', str(user_source)]) csv_writer.writerow([user_target, str(connectiontypetargetusertouserone).replace("[","").replace("]","").replace("'",""), str(userone), str(targetuserinteractions.get(userone)), lastfavorited, followedby_targetusertouserone, times_targetuser_mentioned_userone, lastmentioned, str(list(targetuserinteractions.keys()).index(userone)), str(list(targetuserinteractions.keys()).index(userone)) + '/' + str(len(list(targetuserinteractions.keys()))), getreinforcementpercentage(userone, targetuserfavorites_ordereddict), str(connectiontypeuseronetousertwo).replace("[","").replace("]","").replace("'",""), str(usertwo), str(useroneinteractions.get(usertwo)), lastfavorited_useronetousertwo, followedby_useronetousertwo, str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, str(list(useroneinteractions.keys()).index(usertwo)), str(list(useroneinteractions.keys()).index(usertwo)) + '/' + str(len(list(useroneinteractions.keys()))), getreinforcementpercentage(usertwo, useronefavorites_ordereddict), 'Follows', str(userthree),'Follows',str(user_source), times_targetuser_mentioned_userone, lastmentioned, countofuseronementioningtargetuser, lastmentioned_useronetotargetuser, format(targetuser_userone_interaction_score, 'f'), str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, countofusertwomentioninguserone, lastmentioned_usertwotouserone, format(userone_usertwo_interaction_score, 'f'),str(countofusertwomentioninguserthree), lastmentioned_usertwotouserthree, countofuserthreementioningusertwo, lastmentioned_userthreetousertwo, usertwo_userthree_interaction_score,str(countofuserthreementioningsourceuser), lastmentioned_userthreetosourceuser, countofsourceusermentioninguserthree, lastmentioned_sourceusertouserthree, userthree_sourceuser_interaction_score, chaininteraction_mention_score, chaininteraction_mention_score_without_sourceuser, chaininteraction_mention_score_added, RRImultiplicativechainscore_without_sourceuser, RRIImultiplicativechainscore, RIImultiplicativechainscore_without_sourceuser, RIIImultiplicativechainscore]) #print('utlist is: ' + str(utlist)) print('Now trying 4th Degree...') #If target user is mojo jojo, for every user in mojo jojo's favorited, mentioned, and followed users for userone in targetuserinteractions: if skipfourth == True: if userone in targetuserinteractions: #- this line helps me skip to next degree continue if os.path.exists('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers_userone = [] columnofusers_usertwo = [] #skiplastusertwo_fourthdegree = False for row in csv_reader: columnofusers_userone.append(str(row[2])) for entry in columnofusers_userone: if str(entry) == "User 1": columnofusers_userone.remove(entry) if int(skiplastuser_fourthdegree) == 1: # if it's 1, keep adding to the last user in User 1 list skiplastusertwo_fourthdegree = False skiplastuser_fourthdegree = 2 if str(columnofusers_userone[len(columnofusers_userone) -1]) == str(columnofusers_userone[len(columnofusers_userone) -2]): #if last element is the same as the element before that for userone in columnofusers_userone[:-1]: continue elif str(columnofusers_userone[len(columnofusers_userone) -1]) != str(columnofusers_userone[len(columnofusers_userone) -2]): columnofusers_userone.append(str(columnofusers_userone[len(columnofusers_userone) -1])) for userone in columnofusers_userone[:-1]: continue elif int(skiplastuser_fourthdegree) == 2: # skip last user 1 #skiplastusertwo_fourthdegree = True skiplastusertwo_fourthdegree = False if userone in columnofusers_userone: continue elif int(skiplastuser_fourthdegree) == 3: #dont skip last user 2 in fourth degree skiplastusertwo_fourthdegree = False elif int(skiplastuser_fourthdegree) == 4: # skip last user 2 in fourth degree skiplastusertwo_fourthdegree = True #elif skiplastuser_fourthdegree == 5: #skip last #the function " checkifuserfollowedfavoritedmentionedbyprevioususer" takes 5 inputs and has 2 outputs. #Input 1: current username in loop #Input 2: Target user (Mojo Jojo) (or previous user) #Input 3: List of usernames mojo jojo is following (or previous user is following) #Input 4: List of usernames mojo jojo has favorited (or previous user has favorited) #Input 5: Dict of all usernames mojo jojo has interacted with #Output 1: Connection type (A list with: ['Follows', 'Favorited', 'Mentioned']) if those apply #Output 2: Followedby (A variable with either 'Yes', or 'No' to check if theyre followed by mojo jojo) connectiontypetargetusertouserone, followedby_targetusertouserone = checkifuserfollowedfavoritedmentionedbyprevioususer(userone, user_target, targetuserfollowinglist, targetuserfavoriteslist, targetuserinteractions) lastmentioned = lastdateprevioususermentioneduser(user_target, userone) lastfavorited = lastdateprevioususerfavoriteduser(user_target, userone) times_targetuser_mentioned_userone = counttimesprevioususermentioneduser(user_target, userone) print("Phase 1: Making following list for " + str(userone)) #Open user target following CSV file and save every row after header to an array useronefollowinglist, useronefavorites, useronementionscount, useronementionslist, useroneinteractions, useroneinteractionsandfollowing, useronefavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(userone) ####useronefollowinglist = makefollowinglist(userone) #useronefollowinglist is who the t user (user followed by target user - e.g. Mojo Jojo) is following (e.g. who buttercup is following)) ####useronementionscount, useronementionslist = makementionslist(userone) #Now combine all user interactions in one list. E.g. if target user is mojojojo, this would be who buttercup is following, favorited, and mentioned print("Phase 2: Making interactions list for " + str(userone) + ". Counting favorited users.") ####useronefavorites = makefavoriteslist(userone) #useroneinteractions should be named "useroneinteractions" ####useroneinteractions, useroneinteractionsandfollowing = makeinteractionslists(userone, useronefollowinglist, useronefavorites, useronementionslist) #if user_target.strip() in useroneinteractions: #del useroneinteractions[user_target.strip()] #useroneinteractions.remove(user_target.strip()) for usertwo in useroneinteractions: #if target user is mojojojo, useroneinteractions would be dict of who buttercup has followed, favorited and mentioned ordered by number of favorites, usertwo would be bubbles #lastfavorited2 = lastdateprevioususerfavoriteduser(t, usertwo) #if skiplastusertwo_fourthdegree == True: #if usertwo.strip() == user_target.strip(): #print('sioll here') #raise ValueError("shit") #print('usertwostrip', usertwo.strip()) #print('usertargetstrip', user_target.strip()) #file_usertwo = open('usertwostrip.txt', 'a') #file_usertwo.write('usertwostrip ' + usertwo.strip()) #file_usertwo.write('usertargetstrip ' + user_target.strip()) #file_usertwo.close() #continue if os.path.exists('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV #columnofusers_userone = [] columnofusers_usertwo = [] for row in csv_reader: columnofusers_usertwo.append(str(row[12])) for entry in columnofusers_usertwo: if str(entry) == "User 2": columnofusers_usertwo.remove(entry) #if skiplastuser_fourthdegree == 1 or skiplastuser_fourthdegree == 3 or skiplastuser_fourthdegree == 4: if skiplastusertwo_fourthdegree == False: # if it's False, keep adding to the last user in User 2 list skiplastusertwo_fourthdegree = True #skiplastuser_fourthdegree = 2 if str(columnofusers_usertwo[len(columnofusers_usertwo) -1]) == str(columnofusers_usertwo[len(columnofusers_usertwo) -2]): #if last element is the same as the element before that for usertwo in columnofusers_usertwo[:-1]: continue elif str(columnofusers_usertwo[len(columnofusers_usertwo) -1]) != str(columnofusers_usertwo[len(columnofusers_usertwo) -2]): columnofusers_usertwo.append(str(columnofusers_usertwo[len(columnofusers_usertwo) -1])) for usertwo in columnofusers_usertwo[:-1]: continue elif skiplastusertwo_fourthdegree == True: # skip last user 2 if userone in columnofusers_userone[:-1]: #not sure about this line, we'll see how it goes - the goal of this line is to only remove bubbles FOR buttercup but not for other user 1 instances if usertwo in columnofusers_usertwo[:-1]: continue connectiontypeuseronetousertwo, followedby_useronetousertwo = checkifuserfollowedfavoritedmentionedbyprevioususer(usertwo, userone, useronefollowinglist, useronefavorites, useroneinteractions) downloadfollowingifnotthere(usertwo) if not os.path.exists('data/' + str(usertwo) + " following.csv"): print('Did not find ' + str(usertwo) + ' following.csv file in data folder...') if skipifuserisprivate(usertwo) == True: continue usertwofollowingcount = getfollowingcount(usertwo) #if you find that the user is following more than 7,000 people, skip them in this loop if skipuseriffollowingtoomanyandmentionslow(userone, usertwo, usertwofollowingcount) == False: continue #continue #download who that user is following (bubbles) #Open the file of who bubbles is following if order is: mojojojo > buttercup > bubbles if skipif_followinglist_doesntexist(usertwo) == True: continue #usertwofollowinglist = makefollowinglist(usertwo) #usertwofollowinglist would be who bubbles (not buttercup) is following usertwofollowinglist, usertwofavorites, usertwomentionscount, usertwomentionslist, usertwointeractions, usertwointeractionsandfollowing, usertwofavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(usertwo) for userthree in usertwointeractions: #for every user in bubbles's following list (if the order is: mojojojo > buttercup (userone) > bubbles (x) > userthree) #if str(userthree) == "barackobama": #continue userthreefollowingcount = getfollowingcount(userthree) if userthreefollowingcount == None: continue if skipuseriffollowingtoomanyandmentionslow(usertwo, userthree, userthreefollowingcount) == False: continue connectiontypeusertwotouserthree, followedby_usertwotouserthree = checkifuserfollowedfavoritedmentionedbyprevioususer(userthree, usertwo, usertwofollowinglist, usertwofavorites, usertwointeractions) userthreefollowinglist = makefollowinglist(userthree) #SKIPPERS / BOUNCERS downloadfollowingifnotthere(userthree) if not os.path.exists('data/' + str(userthree) + " following.csv"): print('Did not find ' + str(userthree) + ' following.csv file in data folder...') if skipifuserisprivate(userthree) == True: continue if skipuseriffollowingtoomanyandmentionslow(usertwo, userthree, userthreefollowingcount) == False: continue if skipif_followinglist_doesntexist(userthree) == True: continue if userthreefollowinglist == None: continue for userfour in userthreefollowinglist: #if skipuseriffollowingtoomanyandmentionslow(userthree, userfour, user) if userfour not in sourceuserfollowerslist: #if that user isn't being followed by the source user (e.g. person a) #print("Did not find " + str(userthree) + " in " + str(user_source) + " followers\n") #print("Skipping...\n") continue if userfour in sourceuserfollowerslist: #if you find one of those users (that bubbles is following) in person a's followers list e.g. ((if the order is: mojojojo > buttercup (userone) > bubbles (usertwo) > userthree > person a) useroneslotinchain = [] usertwoslotinchain = [] userthreeslotinchain = [] userfourslotinchain = [] countof4users = 0 if os.path.exists('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV for row in csv_reader: useroneslotinchain.append(row[2]) usertwoslotinchain.append(row[12]) userthreeslotinchain.append(row[22]) userfourslotinchain.append(row[32]) #find row by counting current rows, also adjust row numbers based on new data if userone in useroneslotinchain: #alreadyinthird.append(userone) countof4users = 1 if usertwo in usertwoslotinchain: #alreadyinthird.append(x) countof4users = countof4users + 1 if userthree in userthreeslotinchain: #alreadyinthird.append(xf) countof4users = countof4users + 1 #if alreadyinthird == [t, x, xf]: if userfour in userfourslotinchain: countof4users = countof4users + 1 if countof4users == 4: continue #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED #if int(useroneinteractions.get(usertwo)) <= 0 and followcheck == False: #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED #print("Likes by" + str(userone) + " for " + str(usertwo) + " are zero. And not being followed by " + str(userone) + " Skipping...") #continue #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED print("Found " + str(userfour) + " in " + str(user_source) + " followers") #tweets by buttercup mentioning bubbles? perhaps I could get that number from the tmentionscount? benefit of this approach is it gives me a file with the mentions #countofuseronementioningusertwo #countofusertwomentioninguserthree countofuseronementioningtargetuser = counttimesprevioususermentioneduser(userone, user_target) lastmentioned_useronetotargetuser = lastdateprevioususermentioneduser(userone, user_target) targetuser_userone_interaction_score = get_weighted_interactions_score_light(user_target, userone, times_targetuser_mentioned_userone, countofuseronementioningtargetuser, lastmentioned, lastmentioned_useronetotargetuser) #times and dates of when user 1 mentioned user 2 countofuseronementioningusertwo = counttimesprevioususermentioneduser(userone, usertwo) lastmentioned_useronetousertwo = lastdateprevioususermentioneduser(userone, usertwo) lastfavorited_useronetousertwo = lastdateprevioususerfavoriteduser(userone, usertwo) #times and dates of when user 2 mentioned user 1 countofusertwomentioninguserone = counttimesprevioususermentioneduser(usertwo, userone) lastmentioned_usertwotouserone = lastdateprevioususermentioneduser(usertwo, userone) #if int(countofuseronementioningusertwo) > 0 and mentionedadded == False: #connectiontypeuseronetousertwo.append("Mentioned") #NEW ADDED #mentionedadded = True #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userone_usertwo_interaction_score = interactionscore(countofuseronementioningusertwo, countofusertwomentioninguserone) #userone_usertwo_interaction_score = get_weighted_interactions_score(userone, usertwo) userone_usertwo_interaction_score = get_weighted_interactions_score_light(userone, usertwo, countofuseronementioningusertwo, countofusertwomentioninguserone, lastmentioned_useronetousertwo, lastmentioned_usertwotouserone) countofusertwomentioninguserthree = counttimesprevioususermentioneduser(usertwo, userthree) lastmentioned_usertwotouserthree = lastdateprevioususermentioneduser(usertwo, userthree) lastfavorited_usertwotouserthree = lastdateprevioususerfavoriteduser(usertwo, userthree) #times and dates of when user 3 mentioned user 2 countofuserthreementioningusertwo = counttimesprevioususermentioneduser(userthree, usertwo) lastmentioned_userthreetousertwo = lastdateprevioususermentioneduser(userthree, usertwo) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #usertwo_userthree_interaction_score = interactionscore(countofusertwomentioninguserthree, countofuserthreementioningusertwo) #usertwo_userthree_interaction_score = get_weighted_interactions_score(usertwo, userthree) usertwo_userthree_interaction_score = get_weighted_interactions_score_light(usertwo, userthree, countofusertwomentioninguserthree, countofuserthreementioningusertwo, lastmentioned_usertwotouserthree, lastmentioned_userthreetousertwo) #times and dates of when user 3 mentioned user 4 countofuserthreementioninguserfour = counttimesprevioususermentioneduser(userthree, userfour) lastmentioned_userthreetouserfour = lastdateprevioususermentioneduser(userthree, userfour) #times and dates of when user 4 mentioned user 3 countofuserfourmentioninguserthree = counttimesprevioususermentioneduser(userfour, userthree) lastmentioned_userfourtouserthree = lastdateprevioususermentioneduser(userfour, userthree) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userthree_userfour_interaction_score = interactionscore(countofuserthreementioninguserfour, countofuserfourmentioninguserthree) #userthree_userfour_interaction_score = get_weighted_interactions_score(userthree, userfour) userthree_userfour_interaction_score = get_weighted_interactions_score_light(userthree, userfour, countofuserthreementioninguserfour, countofuserfourmentioninguserthree, lastmentioned_userthreetouserfour, lastmentioned_userfourtouserthree) countofuserfourmentioningsourceuser = counttimesprevioususermentioneduser(userfour, user_source) lastmentioned_userfourtosourceuser = lastdateprevioususermentioneduser(userfour, user_source) #times and dates of when source user mentioned user 4 countofsourceusermentioninguserfour = counttimesprevioususermentioneduser(user_source, userfour) lastmentioned_sourceusertouserfour = lastdateprevioususermentioneduser(user_source, userfour) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userfour_sourceuser_interaction_score = interactionscore(countofuserfourmentioningsourceuser, countofsourceusermentioninguserfour) userfour_sourceuser_interaction_score = get_weighted_interactions_score_light(userfour, user_source, countofuserfourmentioningsourceuser, countofsourceusermentioninguserfour, lastmentioned_userfourtosourceuser, lastmentioned_sourceusertouserfour) #Chain scores chaininteraction_mention_score = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_sourceuser_interaction_score))))) chaininteraction_mention_score_added = float(targetuser_userone_interaction_score) + float(userone_usertwo_interaction_score) + float(usertwo_userthree_interaction_score) + float(userthree_userfour_interaction_score) + float(userfour_sourceuser_interaction_score) chaininteraction_mention_score_without_sourceuser = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score)))) #Reinforcement percentage multiplied by recency score reinforcementpercentxrecency_targetuser_userone = getreinforcementpercentxrecency(user_target, userone, getreinforcementpercentage(userone, targetuserfavorites_ordereddict)) reinforcementpercentxrecency_userone_usertwo = getreinforcementpercentxrecency(userone, usertwo, getreinforcementpercentage(usertwo, useronefavorites_ordereddict)) reinforcementpercentxrecency_usertwo_userthree = getreinforcementpercentxrecency(usertwo, userthree, getreinforcementpercentage(userthree, usertwofavorites_ordereddict)) #Reinforcement Chain scores (Reinforcement til no more reinforcement scores) RRRImultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(reinforcementpercentxrecency_usertwo_userthree) * (float(userthree_userfour_interaction_score)))) RRRIImultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(reinforcementpercentxrecency_usertwo_userthree) * (float(userthree_userfour_interaction_score)) * (float(userfour_sourceuser_interaction_score)))) #Reinforcement Percentage of Target User influencing rest of chain RIIImultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score)))) R4Imultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_sourceuser_interaction_score))))) with open('results/' + 'Fourth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added4: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Last recorded favorite by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Last recorded mention by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 2','Times favorited by User 1','Last recorded favorite by User 1','Followed by User 1?','Times mentioned by User 1','Last mentioned by User 1','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 3','Times favorited by User 2','Last recorded favorite by User 2','Followed by User 2?','Times mentioned by User 2','Last mentioned by User 2','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 4','>','Source User', 'Times ' + str(user_target) + ' mentioned User 1', 'Last recorded mention of User 1 by ' + str(user_target), 'Times User 1 mentioned ' + str(user_target), 'Last recorded mention of ' + str(user_target) + ' by User 1', 'Interaction score for ' + str(user_target) + ' and User 1', 'Times User 1 mentioned User 2', 'Last recorded mention of User 2 by User 1','Times User 2 mentioned User 1', 'Last recorded mention of User 1 by User 2', 'Interaction Score for User 1 and User 2', 'Times User 2 mentioned User 3', 'Last recorded mention of User 3 by User 2','Times User 3 mentioned User 2','Last recorded mention of User 2 by User 3','Interaction Score for User 2 and User 3','Times User 3 mentioned User 4', 'Last recorded mention of User 3 by User 4', 'Times User 4 mentioned User 3','Last recorded mention of User 4 by User 3', 'Interaction Score for User 3 and User 4','Times ' + str(user_source) + ' was mentioned by User 4','Last recorded mention of ' + str(user_source) + ' by User 4', 'Times ' + str(user_source) + ' mentioned User 4', 'Last recorded mention of User 4 by ' + str(user_source),'Interaction Score for User 4 and ' + str(user_source), 'Chain Interaction Score', 'Chain Interaction Score (Without User 4 and ' + str(user_source) + ')', 'Additive Chain Interaction Score', 'RRRI Multiplicative Chain Score (without ' + str(user_source) + ')', 'RRRII Multiplicative Chain Score', 'RIII Multiplicative Chain Score (without ' + str(user_source) + ')', 'R-4I Multiplicative Chain Score']) #currently working #csv_writer.writerow(['Target Username', 'Follows','User 1','Follows','User 2', 'Follows', 'Source User']) header_added4 = True #dont know if next line particularly counted[s] will work - goal is to find value for every specific key found / cross referenced - for reinforcement rank i need some kind of enumeration / enumerate thing that indexes every key or transforms it into an array then indexes it #csv_writer.writerow([user_target, '>', str(userone), '>', str(s), '>', str(user_source)]) csv_writer.writerow([user_target, str(connectiontypetargetusertouserone).replace("[","").replace("]","").replace("'",""), str(userone), str(targetuserinteractions.get(userone)), lastfavorited, followedby_targetusertouserone, times_targetuser_mentioned_userone, lastmentioned, str(list(targetuserinteractions.keys()).index(userone)), str(list(targetuserinteractions.keys()).index(userone)) + '/' + str(len(list(targetuserinteractions.keys()))), getreinforcementpercentage(userone, targetuserfavorites_ordereddict), str(connectiontypeuseronetousertwo).replace("[","").replace("]","").replace("'",""), str(usertwo), str(useroneinteractions.get(usertwo)), lastfavorited_useronetousertwo, followedby_useronetousertwo, str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, str(list(useroneinteractions.keys()).index(usertwo)), str(list(useroneinteractions.keys()).index(usertwo)) + '/' + str(len(list(useroneinteractions.keys()))),getreinforcementpercentage(usertwo, useronefavorites_ordereddict),str(connectiontypeusertwotouserthree).replace("[","").replace("]","").replace("'",""),str(userthree), str(usertwointeractions.get(userthree)),lastfavorited_usertwotouserthree,followedby_usertwotouserthree, str(countofusertwomentioninguserthree), lastmentioned_usertwotouserthree, str(list(usertwointeractions.keys()).index(userthree)), str(list(usertwointeractions.keys()).index(userthree)) + '/' + str(len(list(usertwointeractions.keys()))), getreinforcementpercentage(userthree, usertwofavorites_ordereddict), 'Follows',str(userfour),'Follows',str(user_source), times_targetuser_mentioned_userone, lastmentioned, countofuseronementioningtargetuser, lastmentioned_useronetotargetuser, format(targetuser_userone_interaction_score, 'f'), str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, str(countofusertwomentioninguserone), lastmentioned_usertwotouserone,str(format(userone_usertwo_interaction_score, 'f')),str(countofusertwomentioninguserthree),lastmentioned_usertwotouserthree,str(countofuserthreementioningusertwo), lastmentioned_userthreetousertwo, format(usertwo_userthree_interaction_score, 'f'),str(countofuserthreementioninguserfour), lastmentioned_userthreetouserfour,str(countofuserfourmentioninguserthree), lastmentioned_userfourtouserthree, str(format(userthree_userfour_interaction_score, 'f')), str(countofuserfourmentioningsourceuser),lastmentioned_userfourtosourceuser, str(countofsourceusermentioninguserfour), lastmentioned_sourceusertouserfour, format(userfour_sourceuser_interaction_score, 'f'), chaininteraction_mention_score, chaininteraction_mention_score_without_sourceuser, chaininteraction_mention_score_added, RRRImultiplicativechainscore_without_sourceuser, RRRIImultiplicativechainscore, RIIImultiplicativechainscore_without_sourceuser, R4Imultiplicativechainscore]) print('Now trying 5th Degree...') #If target user is mojojojo, for every user in mojo jojo's favorited, mentioned, and followed users for userone in targetuserinteractions: if os.path.exists('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV columnofusers_userone = [] columnofusers_usertwo = [] for row in csv_reader: columnofusers_userone.append(str(row[2])) for entry in columnofusers_userone: if str(entry) == "User 1": columnofusers_userone.remove(entry) if int(skiplastuser_fifthdegree) == 1: # if it's 1, keep adding to the last user in User 1 list skiplastusertwo_fifthdegree = False skiplastuserthree_fifthdegree = False skiplastuser_fifthdegree = 2 if str(columnofusers_userone[len(columnofusers_userone) -1]) == str(columnofusers_userone[len(columnofusers_userone) -2]): #if last element is the same as the element before that for userone in columnofusers_userone[:-1]: continue elif str(columnofusers_userone[len(columnofusers_userone) -1]) != str(columnofusers_userone[len(columnofusers_userone) -2]): columnofusers_userone.append(str(columnofusers_userone[len(columnofusers_userone) -1])) for userone in columnofusers_userone[:-1]: continue elif int(skiplastuser_fifthdegree) == 2: # skip last user 1 if userone in columnofusers_userone: continue elif int(skiplastuser_fifthdegree) == 3: #dont skip last user 2 in fifth degree skiplastusertwo_fifthdegree = False elif int(skiplastuser_fifthdegree) == 4: # skip last user 2 in fifth degree skiplastusertwo_fifthdegree = True elif int(skiplastuser_fifthdegree) == 5: #dont skip last user 3 in fifth degree skiplastuserthree_fifthdegree = False elif int(skiplastuser_fifthdegree) == 6: # skip last user 3 in fifth degree skiplastuserthree_fifthdegree = True #the function " checkifuserfollowedfavoritedmentionedbyprevioususer" takes 5 inputs and has 2 outputs. #Input 1: current username in loop #Input 2: Target user (Mojo Jojo) (or previous user) #Input 3: List of usernames mojo jojo is following (or previous user is following) #Input 4: List of usernames mojo jojo has favorited (or previous user has favorited) #Input 5: Dict of all usernames mojo jojo has interacted with #Output 1: Connection type (A list with: ['Follows', 'Favorited', 'Mentioned']) if those apply #Output 2: Followedby (A variable with either 'Yes', or 'No' to check if theyre followed by mojo jojo) connectiontypetargetusertouserone, followedby_targetusertouserone = checkifuserfollowedfavoritedmentionedbyprevioususer(userone, user_target, targetuserfollowinglist, targetuserfavoriteslist, targetuserinteractions) lastmentioned = lastdateprevioususermentioneduser(user_target, userone) lastfavorited = lastdateprevioususerfavoriteduser(user_target, userone) times_targetuser_mentioned_userone = counttimesprevioususermentioneduser(user_target, userone) #def checkifusersfollowinglistexistsifnotdownload(user): # if not os.path.exists(str(user) + " following.csv"): #downloadfollowing(user) #if os.path.exists(str(user) + " following.csv"): #return True #if not os.path.exists(str(user) + " following.csv"): #return False #if checkifusersfollowinglistexistsifnotdownload(userone) == False: #continue #if you dont find a file called "(username) following.csv", download who they're following (e.g. buttercup's following) if target user is Mojo Jojo print("Phase 1: Making following list for " + str(userone)) #Open user target following CSV file and save every row after header to an array #is it better to have this compressed or is it more useful to have them separate? useronefollowinglist, useronefavorites, useronementionscount, useronementionslist, useroneinteractions, useroneinteractionsandfollowing, useronefavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(userone) ####useronefollowinglist = makefollowinglist(userone) #useronefollowinglist is who the t user (user followed by target user - e.g. Person B) is following (e.g. who buttercup is following)) ####useronementionscount, useronementionslist = makementionslist(userone) #Now combine all user interactions in one list. E.g. if target user is mojojojo, this would be who buttercup is following, favorited, and mentioned print("Phase 2: Making interactions list for " + str(userone) + ". Counting favorited users.") ####useronefavorites = makefavoriteslist(userone) #useroneinteractions should be named "useroneinteractions" ####useroneinteractions, useroneinteractionsandfollowing = makeinteractionslists(userone, useronefollowinglist, useronefavorites, useronementionslist) for usertwo in useroneinteractions: #if target user is mojojojo, useroneinteractions would be dict of who buttercup has followed, favorited and mentioned ordered by number of favorites, usertwo would be bubbles #lastfavorited2 = lastdateprevioususerfavoriteduser(t, usertwo) #if skiplastusertwo_fourthdegree == True: if usertwo == userone: continue if os.path.exists('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV #columnofusers_userone = [] columnofusers_usertwo = [] for row in csv_reader: columnofusers_usertwo.append(str(row[12])) for entry in columnofusers_usertwo: if str(entry) == "User 2": columnofusers_usertwo.remove(entry) if skiplastusertwo_fifthdegree == False: # if it's False, keep adding to the last user in User 2 list skiplastusertwo_fifthdegree = True #skiplastuser_fourthdegree = 2 if str(columnofusers_usertwo[len(columnofusers_usertwo) -1]) == str(columnofusers_usertwo[len(columnofusers_usertwo) -2]): #if last element is the same as the element before that for usertwo in columnofusers_usertwo[:-1]: continue elif str(columnofusers_usertwo[len(columnofusers_usertwo) -1]) != str(columnofusers_usertwo[len(columnofusers_usertwo) -2]): columnofusers_usertwo.append(str(columnofusers_usertwo[len(columnofusers_usertwo) -1])) for usertwo in columnofusers_usertwo[:-1]: continue elif skiplastusertwo_fifthdegree == True: # skip last user 2 if userone in columnofusers_userone[:-1]: #not sure about this line, we'll see how it goes - the goal of this line is to only remove bubbles FOR buttercup but not for other user 1 instances if usertwo in columnofusers_usertwo[:-1]: continue connectiontypeuseronetousertwo, followedby_useronetousertwo = checkifuserfollowedfavoritedmentionedbyprevioususer(usertwo, userone, useronefollowinglist, useronefavorites, useroneinteractions) downloadfollowingifnotthere(usertwo) if not os.path.exists('data/' + str(usertwo) + " following.csv"): print('Did not find ' + str(usertwo) + ' following.csv file in data folder...') if skipifuserisprivate(usertwo) == True: continue usertwofollowingcount = getfollowingcount(usertwo) #if you find that the user is following more than 7,000 people, skip them in this loop if skipuseriffollowingtoomanyandmentionslow(userone, usertwo, usertwofollowingcount) == False: continue #continue #download who that user is following (bubbles) #Open the file of who bubbles is following if order is: mojojojo > buttercup > bubbles #if skipif_followinglist_doesntexist(usertwo) == True: #continue #usertwofollowinglist = makefollowinglist(usertwo) #usertwofollowinglist would be who bubbles (not buttercup) is following usertwofollowinglist, usertwofavorites, usertwomentionscount, usertwomentionslist, usertwointeractions, usertwointeractionsandfollowing, usertwofavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(usertwo) for userthree in usertwointeractions: #for every user in bubbles's following list (if the order is: mojojojo > buttercup (userone) > bubbles (x) > userthree) if userthree == userone: continue if userthree == usertwo: continue if os.path.exists('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV #columnofusers_userone = [] columnofusers_userthree = [] for row in csv_reader: columnofusers_userthree.append(str(row[12])) for entry in columnofusers_userthree: if str(entry) == "User 3": columnofusers_userthree.remove(entry) if skiplastuserthree_fifthdegree == False: # if it's False, keep adding to the last user in User 3 list skiplastuserthree_fifthdegree = True #skiplastuser_fourthdegree = 2 if str(columnofusers_usertwo[len(columnofusers_userthree) -1]) == str(columnofusers_userthree[len(columnofusers_userthree) -2]): #if last element is the same as the element before that for userthree in columnofusers_userthree[:-1]: continue elif str(columnofusers_usertwo[len(columnofusers_userthree) -1]) != str(columnofusers_userthree[len(columnofusers_userthree) -2]): columnofusers_usertwo.append(str(columnofusers_userthree[len(columnofusers_userthree) -1])) for userthree in columnofusers_userthree[:-1]: continue elif skiplastuserthree_fifthdegree == True: # skip last user 3 if userone in columnofusers_userone[:-1]: #not sure about this line, we'll see how it goes - the goal of this line is to only remove bubbles FOR buttercup but not for other user 1 instances if usertwo in columnofusers_usertwo[:-1]: if userthree in columnofusers_userthree[:-1]: continue #if str(userthree) == "barackobama": #continue userthreefollowingcount = getfollowingcount(userthree) if userthreefollowingcount == None: continue if skipuseriffollowingtoomanyandmentionslow(usertwo, userthree, userthreefollowingcount) == False: continue connectiontypeusertwotouserthree, followedby_usertwotouserthree = checkifuserfollowedfavoritedmentionedbyprevioususer(userthree, usertwo, usertwofollowinglist, usertwofavorites, usertwointeractions) userthreefollowinglist = makefollowinglist(userthree) #SKIPPERS / BOUNCERS downloadfollowingifnotthere(userthree) if not os.path.exists('data/' + str(userthree) + " following.csv"): print('Did not find ' + str(userthree) + ' following.csv file in data folder...') if skipifuserisprivate(userthree) == True: continue if skipuseriffollowingtoomanyandmentionslow(usertwo, userthree, userthreefollowingcount) == False: continue #if skipif_followinglist_doesntexist(userthree) == True: #continue #if userthreefollowinglist == None: #continue userthreefollowinglist, userthreefavorites, userthreementionscount, userthreementionslist, userthreeinteractions, userthreeinteractionsandfollowing, userthreefavorites_ordereddict = generatefollowingfavoritementionsinteractionslists(userthree) for userfour in userthreeinteractions: if userfour == usertwo: continue if userfour == userthree: continue userfourfollowingcount = getfollowingcount(userfour) if userfourfollowingcount == None: continue if skipuseriffollowingtoomanyandmentionslow(userthree, userfour, userfourfollowingcount) == False: continue connectiontypeuserthreetouserfour, followedby_userthreetouserfour = checkifuserfollowedfavoritedmentionedbyprevioususer(userfour, userthree, userthreefollowinglist, userthreefavorites, userthreeinteractions) userfourfollowinglist = makefollowinglist(userfour) #SKIPPERS / BOUNCERS downloadfollowingifnotthere(userfour) if not os.path.exists('data/' + str(userfour) + " following.csv"): print('Did not find ' + str(userfour) + ' following.csv file in data folder...') if skipifuserisprivate(userfour) == True: continue if skipuseriffollowingtoomanyandmentionslow(userthree, userfour, userfourfollowingcount) == False: continue if skipif_followinglist_doesntexist(userfour) == True: continue if userfourfollowinglist == None: continue for userfive in userfourfollowinglist: #if skipuseriffollowingtoomanyandmentionslow(userthree, userfour, user) if userfour == userfive: continue if userfive not in sourceuserfollowerslist: #if that user isn't being followed by the source user (e.g. person a) #print("Did not find " + str(userthree) + " in " + str(user_source) + " followers\n") #print("Skipping...\n") continue if userfive in sourceuserfollowerslist: #if you find one of those users (that bubbles is following) in person a's followers list e.g. ((if the order is: mojojojo > buttercup (userone) > bubbles (usertwo) > userthree > person a) useroneslotinchain = [] usertwoslotinchain = [] userthreeslotinchain = [] userfourslotinchain = [] userfiveslotinchain = [] countof5users = 0 if os.path.exists('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv'): with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', 'r') as f: csv_reader = csv.reader(f, delimiter=',') next(csv_reader, None) #count the number of rows in the CSV for row in csv_reader: useroneslotinchain.append(row[2]) usertwoslotinchain.append(row[12]) userthreeslotinchain.append(row[22]) userfourslotinchain.append(row[32]) #find row by counting current rows, also adjust row numbers based on new data userfiveslotinchain.append(row[42]) #estimate - check if this is correct if userone in useroneslotinchain: #alreadyinthird.append(userone) countof5users = 1 if usertwo in usertwoslotinchain: #alreadyinthird.append(x) countof5users = countof5users + 1 if userthree in userthreeslotinchain: #alreadyinthird.append(xf) countof5users = countof5users + 1 #if alreadyinthird == [t, x, xf]: if userfour in userfourslotinchain: countof4users = countof5users + 1 if userfive in userfiveslotinchain: countof5users = countof5users + 1 if countof5users == 5: continue #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED #if int(useroneinteractions.get(usertwo)) <= 0 and followcheck == False: #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED #print("Likes by" + str(userone) + " for " + str(usertwo) + " are zero. And not being followed by " + str(userone) + " Skipping...") #continue #SKIPPING USERS WITH ZERO LIKES AND NOT BEING FOLLOWED print("Found " + str(userfive) + " in " + str(user_source) + " followers") #tweets by buttercup mentioning bubbles? perhaps I could get that number from the tmentionscount? benefit of this approach is it gives me a file with the mentions #countofuseronementioningusertwo #countofusertwomentioninguserthree #Target User -> 1 countofuseronementioningtargetuser = counttimesprevioususermentioneduser(userone, user_target) lastmentioned_useronetotargetuser = lastdateprevioususermentioneduser(userone, user_target) targetuser_userone_interaction_score = get_weighted_interactions_score_light(user_target, userone, times_targetuser_mentioned_userone, countofuseronementioningtargetuser, lastmentioned, lastmentioned_useronetotargetuser) #1 -> 2 #times and dates of when user 1 mentioned user 2 countofuseronementioningusertwo = counttimesprevioususermentioneduser(userone, usertwo) lastmentioned_useronetousertwo = lastdateprevioususermentioneduser(userone, usertwo) lastfavorited_useronetousertwo = lastdateprevioususerfavoriteduser(userone, usertwo) #times and dates of when user 2 mentioned user 1 countofusertwomentioninguserone = counttimesprevioususermentioneduser(usertwo, userone) lastmentioned_usertwotouserone = lastdateprevioususermentioneduser(usertwo, userone) #if int(countofuseronementioningusertwo) > 0 and mentionedadded == False: #connectiontypeuseronetousertwo.append("Mentioned") #NEW ADDED #mentionedadded = True #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userone_usertwo_interaction_score = interactionscore(countofuseronementioningusertwo, countofusertwomentioninguserone) #userone_usertwo_interaction_score = get_weighted_interactions_score(userone, usertwo) userone_usertwo_interaction_score = get_weighted_interactions_score_light(userone, usertwo, countofuseronementioningusertwo, countofusertwomentioninguserone, lastmentioned_useronetousertwo, lastmentioned_usertwotouserone) #2 -> 3 countofusertwomentioninguserthree = counttimesprevioususermentioneduser(usertwo, userthree) lastmentioned_usertwotouserthree = lastdateprevioususermentioneduser(usertwo, userthree) lastfavorited_usertwotouserthree = lastdateprevioususerfavoriteduser(usertwo, userthree) #times and dates of when user 3 mentioned user 2 countofuserthreementioningusertwo = counttimesprevioususermentioneduser(userthree, usertwo) lastmentioned_userthreetousertwo = lastdateprevioususermentioneduser(userthree, usertwo) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #usertwo_userthree_interaction_score = interactionscore(countofusertwomentioninguserthree, countofuserthreementioningusertwo) #usertwo_userthree_interaction_score = get_weighted_interactions_score(usertwo, userthree) usertwo_userthree_interaction_score = get_weighted_interactions_score_light(usertwo, userthree, countofusertwomentioninguserthree, countofuserthreementioningusertwo, lastmentioned_usertwotouserthree, lastmentioned_userthreetousertwo) #times and dates of when user 3 mentioned user 4 #3 -> 4 countofuserthreementioninguserfour = counttimesprevioususermentioneduser(userthree, userfour) lastmentioned_userthreetouserfour = lastdateprevioususermentioneduser(userthree, userfour) lastfavorited_userthreetouserfour = lastdateprevioususerfavoriteduser(userthree, userfour) #times and dates of when user 4 mentioned user 3 countofuserfourmentioninguserthree = counttimesprevioususermentioneduser(userfour, userthree) lastmentioned_userfourtouserthree = lastdateprevioususermentioneduser(userfour, userthree) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userthree_userfour_interaction_score = interactionscore(countofuserthreementioninguserfour, countofuserfourmentioninguserthree) #userthree_userfour_interaction_score = get_weighted_interactions_score(userthree, userfour) userthree_userfour_interaction_score = get_weighted_interactions_score_light(userthree, userfour, countofuserthreementioninguserfour, countofuserfourmentioninguserthree, lastmentioned_userthreetouserfour, lastmentioned_userfourtouserthree) #4 -> 5 countofuserfourmentioninguserfive = counttimesprevioususermentioneduser(userfour, userfive) lastmentioned_userfourtouserfive = lastdateprevioususermentioneduser(userfour, userfive) countofuserfivementioninguserfour = counttimesprevioususermentioneduser(userfive, userfour) lastmentioned_userfivetouserfour = lastdateprevioususermentioneduser(userfive, userfour) userfour_userfive_interaction_score = get_weighted_interactions_score_light(userfour, userfive, countofuserfourmentioninguserfive, countofuserfivementioninguserfour, lastmentioned_userfourtouserfive, lastmentioned_userfivetouserfour) #5 -> Source User countofuserfivementioningsourceuser = counttimesprevioususermentioneduser(userfive, user_source) lastmentioned_userfivetosourceuser = lastdateprevioususermentioneduser(userfive, user_source) #times and dates of when source user mentioned user 4 countofsourceusermentioninguserfive = counttimesprevioususermentioneduser(user_source, userfive) lastmentioned_sourceusertouserfive = lastdateprevioususermentioneduser(user_source, userfive) #TEMPORARILY RETIRING OLD INTERACTIONS SCORE #---------------------------------------------------------------- #userfour_sourceuser_interaction_score = interactionscore(countofuserfourmentioningsourceuser, countofsourceusermentioninguserfour) userfive_sourceuser_interaction_score = get_weighted_interactions_score_light(userfive, user_source, countofuserfivementioningsourceuser, countofsourceusermentioninguserfive, lastmentioned_userfivetosourceuser, lastmentioned_sourceusertouserfive) #Chain scores chaininteraction_mention_score = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_userfive_interaction_score) * (float(userfive_sourceuser_interaction_score)))))) chaininteraction_mention_score_added = float(targetuser_userone_interaction_score) + float(userone_usertwo_interaction_score) + float(usertwo_userthree_interaction_score) + float(userthree_userfour_interaction_score) + float(userfour_userfive_interaction_score) + float(userfive_sourceuser_interaction_score) chaininteraction_mention_score_without_sourceuser = float(targetuser_userone_interaction_score) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_userfive_interaction_score))))) #Reinforcement percentage multiplied by recency score reinforcementpercentxrecency_targetuser_userone = getreinforcementpercentxrecency(user_target, userone, getreinforcementpercentage(userone, targetuserfavorites_ordereddict)) reinforcementpercentxrecency_userone_usertwo = getreinforcementpercentxrecency(userone, usertwo, getreinforcementpercentage(usertwo, useronefavorites_ordereddict)) reinforcementpercentxrecency_usertwo_userthree = getreinforcementpercentxrecency(usertwo, userthree, getreinforcementpercentage(userthree, usertwofavorites_ordereddict)) reinforcementpercentxrecency_userthree_userfour = getreinforcementpercentxrecency(userthree, userfour, getreinforcementpercentage(userfour, userthreefavorites_ordereddict)) #Reinforcement Chain scores (Reinforcement til no more reinforcement scores) RRRRImultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(reinforcementpercentxrecency_usertwo_userthree) * (float(reinforcementpercentxrecency_userthree_userfour) * float(userfour_userfive_interaction_score)))) RRRRIImultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(reinforcementpercentxrecency_userone_usertwo) * (float(reinforcementpercentxrecency_usertwo_userthree) * (float(reinforcementpercentxrecency_userthree_userfour)) * (float(userfour_userfive_interaction_score) * (float(userfive_sourceuser_interaction_score))))) #Reinforcement Percentage of Target User influencing rest of chain R4Imultiplicativechainscore_without_sourceuser = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_userfive_interaction_score))))) R5Imultiplicativechainscore = float(reinforcementpercentxrecency_targetuser_userone) * (float(userone_usertwo_interaction_score) * (float(usertwo_userthree_interaction_score) * (float(userthree_userfour_interaction_score) * (float(userfour_userfive_interaction_score) * (float(userfive_sourceuser_interaction_score)))))) with open('results/' + 'Fifth Degree for ' + str(user_target) + ' and ' + str(user_source) + '.csv', mode='a') as csv_writer: csv_writer = csv.writer(csv_writer, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) if not header_added5: csv_writer.writerow(['Target Username', '>','User 1','Times favorited by ' + str(user_target), 'Last recorded favorite by ' + str(user_target), 'Followed by ' + str(user_target) + '?', 'Times mentioned by ' + str(user_target), 'Last recorded mention by ' + str(user_target), 'Reinforcement Rank', 'Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 2','Times favorited by User 1','Last recorded favorite by User 1','Followed by User 1?','Times mentioned by User 1','Last mentioned by User 1','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 3','Times favorited by User 2','Last recorded favorite by User 2','Followed by User 2?','Times mentioned by User 2','Last mentioned by User 2','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 4','Times favorited by User 3','Last recorded favorite by User 3','Followed by User 3?','Times mentioned by User 3','Last mentioned by User 3','Reinforcement Rank','Reinforcement Rank Out of Number of Interacted Users','Reinforcement Percentage','>','User 5', '>','Source User', 'Times ' + str(user_target) + ' mentioned User 1', 'Last recorded mention of User 1 by ' + str(user_target), 'Times User 1 mentioned ' + str(user_target), 'Last recorded mention of ' + str(user_target) + ' by User 1', 'Interaction score for ' + str(user_target) + ' and User 1', 'Times User 1 mentioned User 2', 'Last recorded mention of User 2 by User 1','Times User 2 mentioned User 1', 'Last recorded mention of User 1 by User 2', 'Interaction Score for User 1 and User 2', 'Times User 2 mentioned User 3', 'Last recorded mention of User 3 by User 2','Times User 3 mentioned User 2','Last recorded mention of User 2 by User 3','Interaction Score for User 2 and User 3','Times User 3 mentioned User 4', 'Last recorded mention of User 3 by User 4', 'Times User 4 mentioned User 3','Last recorded mention of User 4 by User 3', 'Interaction Score for User 3 and User 4','Times User 4 mentioned User 5','Last recorded mention of User 4 by User 5','Times User 5 mentioned User 4','Last recorded mention of User 5 by User 4','Interaction Score for User 4 and User 5','Times ' + str(user_source) + ' was mentioned by User 5','Last recorded mention of ' + str(user_source) + ' by User 5','Times ' + str(user_source) + ' mentioned User 5','Last recorded mention of User 5 by ' + str(user_source),'Interaction Score for User 5 and ' + str(user_source), 'Chain Interaction Score', 'Chain Interaction Score (Without User 5 and ' + str(user_source) + ')', 'Additive Chain Interaction Score', 'RRRRI Multiplicative Chain Score (without ' + str(user_source) + ')', 'RRRRII Multiplicative Chain Score', 'R-4I Multiplicative Chain Score (without ' + str(user_source) + ')', 'R-5I Multiplicative Chain Score']) #currently working #csv_writer.writerow(['Target Username', 'Follows','User 1','Follows','User 2', 'Follows', 'Source User']) header_added5 = True #dont know if next line particularly counted[s] will work - goal is to find value for every specific key found / cross referenced - for reinforcement rank i need some kind of enumeration / enumerate thing that indexes every key or transforms it into an array then indexes it #csv_writer.writerow([user_target, '>', str(userone), '>', str(s), '>', str(user_source)]) csv_writer.writerow([user_target, str(connectiontypetargetusertouserone).replace("[","").replace("]","").replace("'",""), str(userone), str(targetuserinteractions.get(userone)), lastfavorited, followedby_targetusertouserone, times_targetuser_mentioned_userone, lastmentioned, str(list(targetuserinteractions.keys()).index(userone)), str(list(targetuserinteractions.keys()).index(userone)) + '/' + str(len(list(targetuserinteractions.keys()))), getreinforcementpercentage(userone, targetuserfavorites_ordereddict), str(connectiontypeuseronetousertwo).replace("[","").replace("]","").replace("'",""), str(usertwo), str(useroneinteractions.get(usertwo)), lastfavorited_useronetousertwo, followedby_useronetousertwo, str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, str(list(useroneinteractions.keys()).index(usertwo)), str(list(useroneinteractions.keys()).index(usertwo)) + '/' + str(len(list(useroneinteractions.keys()))),getreinforcementpercentage(usertwo, useronefavorites_ordereddict),str(connectiontypeusertwotouserthree).replace("[","").replace("]","").replace("'",""),str(userthree), str(usertwointeractions.get(userthree)),lastfavorited_usertwotouserthree,followedby_usertwotouserthree, str(countofusertwomentioninguserthree), lastmentioned_usertwotouserthree, str(list(usertwointeractions.keys()).index(userthree)), str(list(usertwointeractions.keys()).index(userthree)) + '/' + str(len(list(usertwointeractions.keys()))), getreinforcementpercentage(userthree, usertwofavorites_ordereddict), str(connectiontypeuserthreetouserfour).replace("[","").replace("]","").replace("'","") ,str(userfour),str(userthreeinteractions.get(userfour)), lastfavorited_userthreetouserfour, followedby_userthreetouserfour, str(countofuserthreementioninguserfour), lastmentioned_userthreetouserfour, str(list(userthreeinteractions.keys()).index(userfour)), str(list(userthreeinteractions.keys()).index(userfour)) + '/' + str(len(list(userthreeinteractions.keys()))),getreinforcementpercentage(userfour, userthreefavorites_ordereddict),'Follows',str(userfive),'Follows', str(user_source), times_targetuser_mentioned_userone, lastmentioned, countofuseronementioningtargetuser, lastmentioned_useronetotargetuser, format(targetuser_userone_interaction_score, 'f'), str(countofuseronementioningusertwo), lastmentioned_useronetousertwo, str(countofusertwomentioninguserone), lastmentioned_usertwotouserone,str(format(userone_usertwo_interaction_score, 'f')),str(countofusertwomentioninguserthree),lastmentioned_usertwotouserthree,str(countofuserthreementioningusertwo), lastmentioned_userthreetousertwo, format(usertwo_userthree_interaction_score, 'f'),str(countofuserthreementioninguserfour), lastmentioned_userthreetouserfour,str(countofuserfourmentioninguserthree), lastmentioned_userfourtouserthree, str(format(userthree_userfour_interaction_score, 'f')), str(countofuserfourmentioninguserfive), lastmentioned_userfourtouserfive, str(countofuserfivementioninguserfour), lastmentioned_userfivetouserfour, format(userfour_userfive_interaction_score, 'f'),str(countofuserfivementioningsourceuser),lastmentioned_userfivetosourceuser, str(countofsourceusermentioninguserfive), lastmentioned_sourceusertouserfive, format(userfive_sourceuser_interaction_score, 'f'),chaininteraction_mention_score, chaininteraction_mention_score_without_sourceuser, chaininteraction_mention_score_added, RRRRImultiplicativechainscore_without_sourceuser, RRRRIImultiplicativechainscore, R4Imultiplicativechainscore_without_sourceuser, R5Imultiplicativechainscore])