es
/
weather


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
							# This script is largely vibecoded, changed by me, reviewed, and tested on my machine.
# ####################################################################################
# Reads sensor text files from a top-level folder ("temperature") and from historical subfolders ("2024", "2023"). Filenames are Unix timestamps like 1700000000.txt.
# Each file is parsed for lines containing "Temperature", "Pressure", and "Humidity" and the numeric values after the colon. Only files that contain all three values are kept.
# Loads recent data from the top-level folder for the last 24 hours (since yesterday). Exits if no recent data found.
# Loads historical data from the specified subfolders, deduplicates identical timestamps by averaging, and builds a dense 1-minute interpolated historical timeseries when historical data exists.
# Resamples the recent (last-24h) data to 5-minute intervals.
# For each timestamp in the recent series, it attempts to build two historical series:
#    - 1 year back and 2 years back (same calendar time shifted by 1 and 2 years).
#    - For each target historical time it first tries an exact match in historical data; if none, it looks for the nearest timestamp within ±1 day; if still none, it attempts time interpolation from the dense 1-minute historical series. If all fail, it produces NaN.
#
# For each of Temperature, Pressure, and Humidity:
#    - Aligns recent data to today's date (time-of-day) and plots it (blue) for the last 24 hours resampled at 5 minutes.
#    - Plots the 1-year-back (orange) and 2-year-back (green) series aligned to the same time-of-day axis, if available.
#    - X-axis covers 00:00–23:59 of today with hour ticks every 2 hours.
#    - Adds a small stats table below the plot showing Min, Max, Avg for each available year (current, 1y, 2y).
#    - Saves each plot as Temperature.png, Pressure.png, Humidity.png and prints the saved filename.
#
# The current year is located in the folder temperature
# all previous years are moved to folders temperature/YYYY
# every 5th minute a new unixtimestamp.txt magically (it's another script) appears in the folder temperature,
# this script runs at 23:56, when the last 5th minute of the day is over and there should be all collected data
# points are collected.
#
# Because I developed all this stuff over a long period of time, the data is stored in text files with the following content
# Temperature :  10.28 °C
# Pressure    :  979.6165 hPa
# Humidity    :  100 %
# 
# So I had to deal with this now, knowing that it would be much easier to store all the data points in some database or even structured text files.
# On the other hand I could rewrite a lot of things, risking to break more things than I want to.
#

import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import matplotlib.dates as mdates

# Config
data_folder = 'temperature'
historical_folders = ['2024', '2023']  # folders containing historical files
now = datetime.now()
yesterday = now - timedelta(days=1)
timestamp_tolerance = pd.Timedelta(days=1)  # nearest match allowed within ±1 day
colors = {
    'current': 'tab:blue',
    '1y': 'tab:orange',
    '2y': 'tab:green'
}
marker = 'o'
linewidth = 2
markersize = 4

def read_folder(folder_path, since=None):
    timestamps, temps, pres, hums = [], [], [], []
    if not os.path.isdir(folder_path):
        return pd.DataFrame(columns=['Timestamp','Temperature','Pressure','Humidity']).set_index('Timestamp')
    for fname in os.listdir(folder_path):
        if not fname.endswith('.txt'):
            continue
        try:
            ts = int(fname[:-4])
        except ValueError:
            continue
        dt = datetime.fromtimestamp(ts)
        if since is not None and dt < since:
            continue
        t_val = p_val = h_val = None
        with open(os.path.join(folder_path, fname), 'r') as f:
            for line in f:
                if 'Temperature' in line:
                    try:
                        t_val = float(line.split(':')[1].strip().split(' ')[0])
                    except:
                        t_val = None
                elif 'Pressure' in line:
                    try:
                        p_val = float(line.split(':')[1].strip().split(' ')[0])
                    except:
                        p_val = None
                elif 'Humidity' in line:
                    try:
                        h_val = float(line.split(':')[1].strip().split(' ')[0])
                    except:
                        h_val = None
        if t_val is not None and p_val is not None and h_val is not None:
            timestamps.append(dt)
            temps.append(t_val)
            pres.append(p_val)
            hums.append(h_val)
    df = pd.DataFrame({'Timestamp': timestamps, 'Temperature': temps, 'Pressure': pres, 'Humidity': hums})
    if df.empty:
        return df.set_index('Timestamp')
    df.set_index('Timestamp', inplace=True)
    df.sort_index(inplace=True)
    return df

# Load recent data (last 24h) from top-level folder
recent_df = read_folder(data_folder, since=yesterday)
if recent_df.empty:
    print("No data found for the last 24 hours in the top-level folder.")
    exit()

# Load historical data from subfolders
hist_frames = []
for sub in historical_folders:
    path = os.path.join(data_folder, sub)
    hist_frames.append(read_folder(path))
historical_df = pd.concat(hist_frames).sort_index() if hist_frames else pd.DataFrame(columns=recent_df.columns)
if historical_df.empty:
    print("Warning: no historical data found in subfolders:", historical_folders)

# Deduplicate historical timestamps by averaging duplicates
if not historical_df.empty:
    historical_df = historical_df.groupby(historical_df.index).mean()

# Resample recent to 5-minute intervals
recent_resampled = recent_df.resample('5T').mean()

# Prepare dense historical series (1-minute resolution) for interpolation if we have historical data
if not historical_df.empty:
    hist_min = historical_df.index.min()
    hist_max = historical_df.index.max()
    dense_index = pd.date_range(start=hist_min, end=hist_max, freq='1T')
    historical_dense = historical_df.reindex(historical_df.index.union(dense_index)).sort_index().interpolate(method='time')
    historical_dense = historical_dense.reindex(dense_index)
else:
    historical_dense = pd.DataFrame(columns=recent_resampled.columns)

def build_historical_series_with_interpolation(main_index, column, years_back):
    # Target times: same calendar time years_back earlier
    target_times = main_index - pd.DateOffset(years=years_back)
    values = []
    for t in target_times:
        if historical_df.empty:
            values.append(float('nan'))
            continue

        # exact
        if t in historical_df.index:
            values.append(historical_df.loc[t][column])
            continue

        # nearest within tolerance
        try:
            pos = historical_df.index.get_indexer([t], method='nearest')[0]
            nearest_ts = historical_df.index[pos]
            if abs(nearest_ts - t) <= timestamp_tolerance:
                values.append(historical_df.iloc[pos][column])
                continue
        except Exception:
            pass

        # interpolation from dense historical series
        if not historical_dense.empty and (t >= historical_dense.index.min() and t <= historical_dense.index.max()):
            try:
                interp_val = historical_dense.at[pd.to_datetime(t), column]
                values.append(interp_val)
                continue
            except KeyError:
                s = historical_dense[column].reindex(historical_dense.index.union([t])).sort_index().interpolate(method='time')
                values.append(s.at[pd.to_datetime(t)])
                continue

        values.append(float('nan'))

    # Convert target_times to time-of-day aligned to today's date for plotting
    aligned_times = []
    for t in target_times:
        tod = t.time()
        aligned_dt = datetime.combine(now.date(), tod)
        aligned_times.append(aligned_dt)
    series = pd.Series(data=values, index=pd.to_datetime(aligned_times))
    return series

def build_current_series_timeofday(df):
    aligned_times = []
    values = []
    for t, row in df.iterrows():
        tod = t.time()
        aligned_dt = datetime.combine(now.date(), tod)
        aligned_times.append(aligned_dt)
        values.append(row)
    return pd.DataFrame(values, index=pd.to_datetime(aligned_times))

def year_label_for_timestamp(ts):
    return ts.year

def plot_with_history(df_current, column, ylabel, out_prefix):
    current_aligned_df = build_current_series_timeofday(df_current[[column]])
    main_series = current_aligned_df[column]

    # Determine year labels
    year_now = now.year
    year_1 = (now - pd.DateOffset(years=1)).year
    year_2 = (now - pd.DateOffset(years=2)).year

    plt.figure(figsize=(12, 6))
    # Plot current year (use its actual year label)
    plt.plot(main_series.index, main_series.values, color=colors['current'],
             marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_now))

    # Build historical series (aligned to time-of-day)
    hist1 = build_historical_series_with_interpolation(df_current.index, column, 1)
    hist2 = build_historical_series_with_interpolation(df_current.index, column, 2)

    if hist1 is not None and not hist1.dropna().empty:
        plt.plot(hist1.index, hist1.values, color=colors['1y'],
                 marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_1))
    if hist2 is not None and not hist2.dropna().empty:
        plt.plot(hist2.index, hist2.values, color=colors['2y'],
                 marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_2))

    # X-axis: only show hours 00:00 to 23:59, no label and ticks horizontal
    ax = plt.gca()
    ax.set_xlim(datetime.combine(now.date(), datetime.min.time()),
                datetime.combine(now.date(), datetime.max.time()))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))
    for label in ax.get_xticklabels():
        label.set_rotation(0)
        label.set_horizontalalignment('center')
    ax.set_xlabel('')  # remove 'Hour of day' label

    plt.title(f"{ylabel} - {now.strftime('%d.%m.%Y')}")
    plt.ylabel(ylabel)
    plt.grid()
    plt.legend(title='Year')

    # Build stats table: columns Min, Max, Avg; rows = years (YYYY)
    table_years = []
    table_data = []

    def series_stats(s):
        if s is None or s.dropna().empty:
            return ('-', '-', '-')
        return (f"{s.min():.2f}", f"{s.max():.2f}", f"{s.mean():.2f}")

    # Current year
    table_years.append(str(year_now))
    table_data.append(series_stats(main_series))
    # 1y
    if hist1 is not None:
        table_years.append(str(year_1))
        table_data.append(series_stats(hist1))
    # 2y
    if hist2 is not None:
        table_years.append(str(year_2))
        table_data.append(series_stats(hist2))

    # Create table as matplotlib table occupying full width at bottom of plot
    col_labels = ['Min', 'Max', 'Avg']
    cell_text = table_data
    # place table below plot: use bbox to span full width
    the_table = plt.table(cellText=cell_text,
                          rowLabels=table_years,
                          colLabels=col_labels,
                          cellLoc='center',
                          rowLoc='center',
                          colLoc='center',
                          loc='bottom',
                          bbox=[0.0, -0.35, 1.0, 0.25])  # left, bottom, width, height

    the_table.auto_set_font_size(False)
    the_table.set_fontsize(10)

    plt.subplots_adjust(bottom=0.28)  # make room for the table
    outname = f"{out_prefix}.png"
    plt.savefig(outname, bbox_inches='tight')
    plt.close()
    print("Saved", outname)

# Generate plots
plot_with_history(recent_resampled, 'Temperature', 'Temperature (°C)', 'Temperature')
plot_with_history(recent_resampled, 'Pressure', 'Pressure (hPa)', 'Pressure')
plot_with_history(recent_resampled, 'Humidity', 'Humidity (%)', 'Humidity')