# This script is largely vibecoded, changed by me, reviewed, and tested on my machine. # #################################################################################### # Reads sensor text files from a top-level folder ("temperature") and from historical subfolders ("2024", "2023"). Filenames are Unix timestamps like 1700000000.txt. # Each file is parsed for lines containing "Temperature", "Pressure", and "Humidity" and the numeric values after the colon. Only files that contain all three values are kept. # Loads recent data from the top-level folder for the last 24 hours (since yesterday). Exits if no recent data found. # Loads historical data from the specified subfolders, deduplicates identical timestamps by averaging, and builds a dense 1-minute interpolated historical timeseries when historical data exists. # Resamples the recent (last-24h) data to 5-minute intervals. # For each timestamp in the recent series, it attempts to build two historical series: # - 1 year back and 2 years back (same calendar time shifted by 1 and 2 years). # - For each target historical time it first tries an exact match in historical data; if none, it looks for the nearest timestamp within ±1 day; if still none, it attempts time interpolation from the dense 1-minute historical series. If all fail, it produces NaN. # # For each of Temperature, Pressure, and Humidity: # - Aligns recent data to today's date (time-of-day) and plots it (blue) for the last 24 hours resampled at 5 minutes. # - Plots the 1-year-back (orange) and 2-year-back (green) series aligned to the same time-of-day axis, if available. # - X-axis covers 00:00–23:59 of today with hour ticks every 2 hours. # - Adds a small stats table below the plot showing Min, Max, Avg for each available year (current, 1y, 2y). # - Saves each plot as Temperature.png, Pressure.png, Humidity.png and prints the saved filename. # # The current year is located in the folder temperature # all previous years are moved to folders temperature/YYYY # every 5th minute a new unixtimestamp.txt magically (it's another script) appears in the folder temperature, # this script runs at 23:56, when the last 5th minute of the day is over and there should be all collected data # points are collected. # # Because I developed all this stuff over a long period of time, the data is stored in text files with the following content # Temperature : 10.28 °C # Pressure : 979.6165 hPa # Humidity : 100 % # # So I had to deal with this now, knowing that it would be much easier to store all the data points in some database or even structured text files. # On the other hand I could rewrite a lot of things, risking to break more things than I want to. # import os import pandas as pd import matplotlib.pyplot as plt from datetime import datetime, timedelta import matplotlib.dates as mdates # Config data_folder = 'temperature' historical_folders = ['2024', '2023'] # folders containing historical files now = datetime.now() yesterday = now - timedelta(days=1) timestamp_tolerance = pd.Timedelta(days=1) # nearest match allowed within ±1 day colors = { 'current': 'tab:blue', '1y': 'tab:orange', '2y': 'tab:green' } marker = 'o' linewidth = 2 markersize = 4 def read_folder(folder_path, since=None): timestamps, temps, pres, hums = [], [], [], [] if not os.path.isdir(folder_path): return pd.DataFrame(columns=['Timestamp','Temperature','Pressure','Humidity']).set_index('Timestamp') for fname in os.listdir(folder_path): if not fname.endswith('.txt'): continue try: ts = int(fname[:-4]) except ValueError: continue dt = datetime.fromtimestamp(ts) if since is not None and dt < since: continue t_val = p_val = h_val = None with open(os.path.join(folder_path, fname), 'r') as f: for line in f: if 'Temperature' in line: try: t_val = float(line.split(':')[1].strip().split(' ')[0]) except: t_val = None elif 'Pressure' in line: try: p_val = float(line.split(':')[1].strip().split(' ')[0]) except: p_val = None elif 'Humidity' in line: try: h_val = float(line.split(':')[1].strip().split(' ')[0]) except: h_val = None if t_val is not None and p_val is not None and h_val is not None: timestamps.append(dt) temps.append(t_val) pres.append(p_val) hums.append(h_val) df = pd.DataFrame({'Timestamp': timestamps, 'Temperature': temps, 'Pressure': pres, 'Humidity': hums}) if df.empty: return df.set_index('Timestamp') df.set_index('Timestamp', inplace=True) df.sort_index(inplace=True) return df # Load recent data (last 24h) from top-level folder recent_df = read_folder(data_folder, since=yesterday) if recent_df.empty: print("No data found for the last 24 hours in the top-level folder.") exit() # Load historical data from subfolders hist_frames = [] for sub in historical_folders: path = os.path.join(data_folder, sub) hist_frames.append(read_folder(path)) historical_df = pd.concat(hist_frames).sort_index() if hist_frames else pd.DataFrame(columns=recent_df.columns) if historical_df.empty: print("Warning: no historical data found in subfolders:", historical_folders) # Deduplicate historical timestamps by averaging duplicates if not historical_df.empty: historical_df = historical_df.groupby(historical_df.index).mean() # Resample recent to 5-minute intervals recent_resampled = recent_df.resample('5T').mean() # Prepare dense historical series (1-minute resolution) for interpolation if we have historical data if not historical_df.empty: hist_min = historical_df.index.min() hist_max = historical_df.index.max() dense_index = pd.date_range(start=hist_min, end=hist_max, freq='1T') historical_dense = historical_df.reindex(historical_df.index.union(dense_index)).sort_index().interpolate(method='time') historical_dense = historical_dense.reindex(dense_index) else: historical_dense = pd.DataFrame(columns=recent_resampled.columns) def build_historical_series_with_interpolation(main_index, column, years_back): # Target times: same calendar time years_back earlier target_times = main_index - pd.DateOffset(years=years_back) values = [] for t in target_times: if historical_df.empty: values.append(float('nan')) continue # exact if t in historical_df.index: values.append(historical_df.loc[t][column]) continue # nearest within tolerance try: pos = historical_df.index.get_indexer([t], method='nearest')[0] nearest_ts = historical_df.index[pos] if abs(nearest_ts - t) <= timestamp_tolerance: values.append(historical_df.iloc[pos][column]) continue except Exception: pass # interpolation from dense historical series if not historical_dense.empty and (t >= historical_dense.index.min() and t <= historical_dense.index.max()): try: interp_val = historical_dense.at[pd.to_datetime(t), column] values.append(interp_val) continue except KeyError: s = historical_dense[column].reindex(historical_dense.index.union([t])).sort_index().interpolate(method='time') values.append(s.at[pd.to_datetime(t)]) continue values.append(float('nan')) # Convert target_times to time-of-day aligned to today's date for plotting aligned_times = [] for t in target_times: tod = t.time() aligned_dt = datetime.combine(now.date(), tod) aligned_times.append(aligned_dt) series = pd.Series(data=values, index=pd.to_datetime(aligned_times)) return series def build_current_series_timeofday(df): aligned_times = [] values = [] for t, row in df.iterrows(): tod = t.time() aligned_dt = datetime.combine(now.date(), tod) aligned_times.append(aligned_dt) values.append(row) return pd.DataFrame(values, index=pd.to_datetime(aligned_times)) def year_label_for_timestamp(ts): return ts.year def plot_with_history(df_current, column, ylabel, out_prefix): current_aligned_df = build_current_series_timeofday(df_current[[column]]) main_series = current_aligned_df[column] # Determine year labels year_now = now.year year_1 = (now - pd.DateOffset(years=1)).year year_2 = (now - pd.DateOffset(years=2)).year plt.figure(figsize=(12, 6)) # Plot current year (use its actual year label) plt.plot(main_series.index, main_series.values, color=colors['current'], marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_now)) # Build historical series (aligned to time-of-day) hist1 = build_historical_series_with_interpolation(df_current.index, column, 1) hist2 = build_historical_series_with_interpolation(df_current.index, column, 2) if hist1 is not None and not hist1.dropna().empty: plt.plot(hist1.index, hist1.values, color=colors['1y'], marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_1)) if hist2 is not None and not hist2.dropna().empty: plt.plot(hist2.index, hist2.values, color=colors['2y'], marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_2)) # X-axis: only show hours 00:00 to 23:59, no label and ticks horizontal ax = plt.gca() ax.set_xlim(datetime.combine(now.date(), datetime.min.time()), datetime.combine(now.date(), datetime.max.time())) ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) ax.xaxis.set_major_locator(mdates.HourLocator(interval=2)) for label in ax.get_xticklabels(): label.set_rotation(0) label.set_horizontalalignment('center') ax.set_xlabel('') # remove 'Hour of day' label plt.title(f"{ylabel} - {now.strftime('%d.%m.%Y')}") plt.ylabel(ylabel) plt.grid() plt.legend(title='Year') # Build stats table: columns Min, Max, Avg; rows = years (YYYY) table_years = [] table_data = [] def series_stats(s): if s is None or s.dropna().empty: return ('-', '-', '-') return (f"{s.min():.2f}", f"{s.max():.2f}", f"{s.mean():.2f}") # Current year table_years.append(str(year_now)) table_data.append(series_stats(main_series)) # 1y if hist1 is not None: table_years.append(str(year_1)) table_data.append(series_stats(hist1)) # 2y if hist2 is not None: table_years.append(str(year_2)) table_data.append(series_stats(hist2)) # Create table as matplotlib table occupying full width at bottom of plot col_labels = ['Min', 'Max', 'Avg'] cell_text = table_data # place table below plot: use bbox to span full width the_table = plt.table(cellText=cell_text, rowLabels=table_years, colLabels=col_labels, cellLoc='center', rowLoc='center', colLoc='center', loc='bottom', bbox=[0.0, -0.35, 1.0, 0.25]) # left, bottom, width, height the_table.auto_set_font_size(False) the_table.set_fontsize(10) plt.subplots_adjust(bottom=0.28) # make room for the table outname = f"{out_prefix}.png" plt.savefig(outname, bbox_inches='tight') plt.close() print("Saved", outname) # Generate plots plot_with_history(recent_resampled, 'Temperature', 'Temperature (°C)', 'Temperature') plot_with_history(recent_resampled, 'Pressure', 'Pressure (hPa)', 'Pressure') plot_with_history(recent_resampled, 'Humidity', 'Humidity (%)', 'Humidity')