Twitter-Mar-Menor / pseudo_week_transformation.py
pseudo_week_transformation.py
Raw
def pseudo_week_transformation(data,output_file,date_format="%Y-%m-%d"):
    
    #Example values for date_format: "%Y-%m-%d","%d-%m-%Y","%Y/%m/%d","%d/%m/%Y"
    
    #month_location is left or right, depending on its position with respect to the day
    from datetime import datetime

    date_pseudo_weeks = [0] * len(data)
    year = [0] * len(data)
    month = [0] * len(data)
    pseudo_week = [0] * len(data)
    time_index = [0] * len(data)

    data.date = data.date.apply(lambda x: str(datetime.fromtimestamp(datetime.strptime(x, date_format).timestamp()).strftime("%Y-%m-%d")))
    #Only works with dates from year 1970

    for i in range(0, len(data)):
        year[i] = int(str(data['date'].iloc[i])[0:4])
        time_index[i] = i
        month[i] = int(str(data['date'].iloc[i])[5:7])
        if 1 <= int(str(data['date'].iloc[i])[8:10]) <= 7:
            pseudo_week[i] = 1
        elif 8 <= int(str(data['date'].iloc[i])[8:10]) <= 14:
            pseudo_week[i] = 2
        elif 15 <= int(str(data['date'].iloc[i])[8:10]) <= 21:
            pseudo_week[i] = 3
        elif 22 <= int(str(data['date'].iloc[i])[8:10]) <= 31:
            pseudo_week[i] = 4
        #date_pseudo_weeks[i] = str(pseudo_week[i]) + "/{:02d}".format(month[i]) + "/{:02d}".format(year[i])
        date_pseudo_weeks[i] = "{:02d}-".format(year[i]) + "{:02d}-".format(month[i]) + "{:02d}".format(pseudo_week[i]*7)
        

    data['date'] = date_pseudo_weeks
    
    #Choose here between sum or mean
    data = data.groupby(by=["date"], sort=False).sum()

    data.to_excel(output_file, index=True)