|
@@ -0,0 +1,277 @@
|
|
|
|
|
+# This script is largely vibecoded, changed by me, reviewed, and tested on my machine.
|
|
|
|
|
+# ####################################################################################
|
|
|
|
|
+# Reads sensor text files from a top-level folder ("temperature") and from historical subfolders ("2024", "2023"). Filenames are Unix timestamps like 1700000000.txt.
|
|
|
|
|
+# Each file is parsed for lines containing "Temperature", "Pressure", and "Humidity" and the numeric values after the colon. Only files that contain all three values are kept.
|
|
|
|
|
+# Loads recent data from the top-level folder for the last 24 hours (since yesterday). Exits if no recent data found.
|
|
|
|
|
+# Loads historical data from the specified subfolders, deduplicates identical timestamps by averaging, and builds a dense 1-minute interpolated historical timeseries when historical data exists.
|
|
|
|
|
+# Resamples the recent (last-24h) data to 5-minute intervals.
|
|
|
|
|
+# For each timestamp in the recent series, it attempts to build two historical series:
|
|
|
|
|
+# - 1 year back and 2 years back (same calendar time shifted by 1 and 2 years).
|
|
|
|
|
+# - For each target historical time it first tries an exact match in historical data; if none, it looks for the nearest timestamp within ±1 day; if still none, it attempts time interpolation from the dense 1-minute historical series. If all fail, it produces NaN.
|
|
|
|
|
+#
|
|
|
|
|
+# For each of Temperature, Pressure, and Humidity:
|
|
|
|
|
+# - Aligns recent data to today's date (time-of-day) and plots it (blue) for the last 24 hours resampled at 5 minutes.
|
|
|
|
|
+# - Plots the 1-year-back (orange) and 2-year-back (green) series aligned to the same time-of-day axis, if available.
|
|
|
|
|
+# - X-axis covers 00:00–23:59 of today with hour ticks every 2 hours.
|
|
|
|
|
+# - Adds a small stats table below the plot showing Min, Max, Avg for each available year (current, 1y, 2y).
|
|
|
|
|
+# - Saves each plot as Temperature.png, Pressure.png, Humidity.png and prints the saved filename.
|
|
|
|
|
+#
|
|
|
|
|
+# The current year is located in the folder temperature
|
|
|
|
|
+# all previous years are moved to folders temperature/YYYY
|
|
|
|
|
+# every 5th minute a new unixtimestamp.txt magically (it's another script) appears in the folder temperature,
|
|
|
|
|
+# this script runs at 23:56, when the last 5th minute of the day is over and there should be all collected data
|
|
|
|
|
+# points are collected.
|
|
|
|
|
+#
|
|
|
|
|
+# Because I developed all this stuff over a long period of time, the data is stored in text files with the following content
|
|
|
|
|
+# Temperature : 10.28 °C
|
|
|
|
|
+# Pressure : 979.6165 hPa
|
|
|
|
|
+# Humidity : 100 %
|
|
|
|
|
+#
|
|
|
|
|
+# So I had to deal with this now, knowing that it would be much easier to store all the data points in some database or even structured text files.
|
|
|
|
|
+# On the other hand I could rewrite a lot of things, risking to break more things than I want to.
|
|
|
|
|
+#
|
|
|
|
|
+
|
|
|
|
|
+import os
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
+import matplotlib.pyplot as plt
|
|
|
|
|
+from datetime import datetime, timedelta
|
|
|
|
|
+import matplotlib.dates as mdates
|
|
|
|
|
+
|
|
|
|
|
+# Config
|
|
|
|
|
+data_folder = 'temperature'
|
|
|
|
|
+historical_folders = ['2024', '2023'] # folders containing historical files
|
|
|
|
|
+now = datetime.now()
|
|
|
|
|
+yesterday = now - timedelta(days=1)
|
|
|
|
|
+timestamp_tolerance = pd.Timedelta(days=1) # nearest match allowed within ±1 day
|
|
|
|
|
+colors = {
|
|
|
|
|
+ 'current': 'tab:blue',
|
|
|
|
|
+ '1y': 'tab:orange',
|
|
|
|
|
+ '2y': 'tab:green'
|
|
|
|
|
+}
|
|
|
|
|
+marker = 'o'
|
|
|
|
|
+linewidth = 2
|
|
|
|
|
+markersize = 4
|
|
|
|
|
+
|
|
|
|
|
+def read_folder(folder_path, since=None):
|
|
|
|
|
+ timestamps, temps, pres, hums = [], [], [], []
|
|
|
|
|
+ if not os.path.isdir(folder_path):
|
|
|
|
|
+ return pd.DataFrame(columns=['Timestamp','Temperature','Pressure','Humidity']).set_index('Timestamp')
|
|
|
|
|
+ for fname in os.listdir(folder_path):
|
|
|
|
|
+ if not fname.endswith('.txt'):
|
|
|
|
|
+ continue
|
|
|
|
|
+ try:
|
|
|
|
|
+ ts = int(fname[:-4])
|
|
|
|
|
+ except ValueError:
|
|
|
|
|
+ continue
|
|
|
|
|
+ dt = datetime.fromtimestamp(ts)
|
|
|
|
|
+ if since is not None and dt < since:
|
|
|
|
|
+ continue
|
|
|
|
|
+ t_val = p_val = h_val = None
|
|
|
|
|
+ with open(os.path.join(folder_path, fname), 'r') as f:
|
|
|
|
|
+ for line in f:
|
|
|
|
|
+ if 'Temperature' in line:
|
|
|
|
|
+ try:
|
|
|
|
|
+ t_val = float(line.split(':')[1].strip().split(' ')[0])
|
|
|
|
|
+ except:
|
|
|
|
|
+ t_val = None
|
|
|
|
|
+ elif 'Pressure' in line:
|
|
|
|
|
+ try:
|
|
|
|
|
+ p_val = float(line.split(':')[1].strip().split(' ')[0])
|
|
|
|
|
+ except:
|
|
|
|
|
+ p_val = None
|
|
|
|
|
+ elif 'Humidity' in line:
|
|
|
|
|
+ try:
|
|
|
|
|
+ h_val = float(line.split(':')[1].strip().split(' ')[0])
|
|
|
|
|
+ except:
|
|
|
|
|
+ h_val = None
|
|
|
|
|
+ if t_val is not None and p_val is not None and h_val is not None:
|
|
|
|
|
+ timestamps.append(dt)
|
|
|
|
|
+ temps.append(t_val)
|
|
|
|
|
+ pres.append(p_val)
|
|
|
|
|
+ hums.append(h_val)
|
|
|
|
|
+ df = pd.DataFrame({'Timestamp': timestamps, 'Temperature': temps, 'Pressure': pres, 'Humidity': hums})
|
|
|
|
|
+ if df.empty:
|
|
|
|
|
+ return df.set_index('Timestamp')
|
|
|
|
|
+ df.set_index('Timestamp', inplace=True)
|
|
|
|
|
+ df.sort_index(inplace=True)
|
|
|
|
|
+ return df
|
|
|
|
|
+
|
|
|
|
|
+# Load recent data (last 24h) from top-level folder
|
|
|
|
|
+recent_df = read_folder(data_folder, since=yesterday)
|
|
|
|
|
+if recent_df.empty:
|
|
|
|
|
+ print("No data found for the last 24 hours in the top-level folder.")
|
|
|
|
|
+ exit()
|
|
|
|
|
+
|
|
|
|
|
+# Load historical data from subfolders
|
|
|
|
|
+hist_frames = []
|
|
|
|
|
+for sub in historical_folders:
|
|
|
|
|
+ path = os.path.join(data_folder, sub)
|
|
|
|
|
+ hist_frames.append(read_folder(path))
|
|
|
|
|
+historical_df = pd.concat(hist_frames).sort_index() if hist_frames else pd.DataFrame(columns=recent_df.columns)
|
|
|
|
|
+if historical_df.empty:
|
|
|
|
|
+ print("Warning: no historical data found in subfolders:", historical_folders)
|
|
|
|
|
+
|
|
|
|
|
+# Deduplicate historical timestamps by averaging duplicates
|
|
|
|
|
+if not historical_df.empty:
|
|
|
|
|
+ historical_df = historical_df.groupby(historical_df.index).mean()
|
|
|
|
|
+
|
|
|
|
|
+# Resample recent to 5-minute intervals
|
|
|
|
|
+recent_resampled = recent_df.resample('5T').mean()
|
|
|
|
|
+
|
|
|
|
|
+# Prepare dense historical series (1-minute resolution) for interpolation if we have historical data
|
|
|
|
|
+if not historical_df.empty:
|
|
|
|
|
+ hist_min = historical_df.index.min()
|
|
|
|
|
+ hist_max = historical_df.index.max()
|
|
|
|
|
+ dense_index = pd.date_range(start=hist_min, end=hist_max, freq='1T')
|
|
|
|
|
+ historical_dense = historical_df.reindex(historical_df.index.union(dense_index)).sort_index().interpolate(method='time')
|
|
|
|
|
+ historical_dense = historical_dense.reindex(dense_index)
|
|
|
|
|
+else:
|
|
|
|
|
+ historical_dense = pd.DataFrame(columns=recent_resampled.columns)
|
|
|
|
|
+
|
|
|
|
|
+def build_historical_series_with_interpolation(main_index, column, years_back):
|
|
|
|
|
+ # Target times: same calendar time years_back earlier
|
|
|
|
|
+ target_times = main_index - pd.DateOffset(years=years_back)
|
|
|
|
|
+ values = []
|
|
|
|
|
+ for t in target_times:
|
|
|
|
|
+ if historical_df.empty:
|
|
|
|
|
+ values.append(float('nan'))
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # exact
|
|
|
|
|
+ if t in historical_df.index:
|
|
|
|
|
+ values.append(historical_df.loc[t][column])
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # nearest within tolerance
|
|
|
|
|
+ try:
|
|
|
|
|
+ pos = historical_df.index.get_indexer([t], method='nearest')[0]
|
|
|
|
|
+ nearest_ts = historical_df.index[pos]
|
|
|
|
|
+ if abs(nearest_ts - t) <= timestamp_tolerance:
|
|
|
|
|
+ values.append(historical_df.iloc[pos][column])
|
|
|
|
|
+ continue
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ # interpolation from dense historical series
|
|
|
|
|
+ if not historical_dense.empty and (t >= historical_dense.index.min() and t <= historical_dense.index.max()):
|
|
|
|
|
+ try:
|
|
|
|
|
+ interp_val = historical_dense.at[pd.to_datetime(t), column]
|
|
|
|
|
+ values.append(interp_val)
|
|
|
|
|
+ continue
|
|
|
|
|
+ except KeyError:
|
|
|
|
|
+ s = historical_dense[column].reindex(historical_dense.index.union([t])).sort_index().interpolate(method='time')
|
|
|
|
|
+ values.append(s.at[pd.to_datetime(t)])
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ values.append(float('nan'))
|
|
|
|
|
+
|
|
|
|
|
+ # Convert target_times to time-of-day aligned to today's date for plotting
|
|
|
|
|
+ aligned_times = []
|
|
|
|
|
+ for t in target_times:
|
|
|
|
|
+ tod = t.time()
|
|
|
|
|
+ aligned_dt = datetime.combine(now.date(), tod)
|
|
|
|
|
+ aligned_times.append(aligned_dt)
|
|
|
|
|
+ series = pd.Series(data=values, index=pd.to_datetime(aligned_times))
|
|
|
|
|
+ return series
|
|
|
|
|
+
|
|
|
|
|
+def build_current_series_timeofday(df):
|
|
|
|
|
+ aligned_times = []
|
|
|
|
|
+ values = []
|
|
|
|
|
+ for t, row in df.iterrows():
|
|
|
|
|
+ tod = t.time()
|
|
|
|
|
+ aligned_dt = datetime.combine(now.date(), tod)
|
|
|
|
|
+ aligned_times.append(aligned_dt)
|
|
|
|
|
+ values.append(row)
|
|
|
|
|
+ return pd.DataFrame(values, index=pd.to_datetime(aligned_times))
|
|
|
|
|
+
|
|
|
|
|
+def year_label_for_timestamp(ts):
|
|
|
|
|
+ return ts.year
|
|
|
|
|
+
|
|
|
|
|
+def plot_with_history(df_current, column, ylabel, out_prefix):
|
|
|
|
|
+ current_aligned_df = build_current_series_timeofday(df_current[[column]])
|
|
|
|
|
+ main_series = current_aligned_df[column]
|
|
|
|
|
+
|
|
|
|
|
+ # Determine year labels
|
|
|
|
|
+ year_now = now.year
|
|
|
|
|
+ year_1 = (now - pd.DateOffset(years=1)).year
|
|
|
|
|
+ year_2 = (now - pd.DateOffset(years=2)).year
|
|
|
|
|
+
|
|
|
|
|
+ plt.figure(figsize=(12, 6))
|
|
|
|
|
+ # Plot current year (use its actual year label)
|
|
|
|
|
+ plt.plot(main_series.index, main_series.values, color=colors['current'],
|
|
|
|
|
+ marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_now))
|
|
|
|
|
+
|
|
|
|
|
+ # Build historical series (aligned to time-of-day)
|
|
|
|
|
+ hist1 = build_historical_series_with_interpolation(df_current.index, column, 1)
|
|
|
|
|
+ hist2 = build_historical_series_with_interpolation(df_current.index, column, 2)
|
|
|
|
|
+
|
|
|
|
|
+ if hist1 is not None and not hist1.dropna().empty:
|
|
|
|
|
+ plt.plot(hist1.index, hist1.values, color=colors['1y'],
|
|
|
|
|
+ marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_1))
|
|
|
|
|
+ if hist2 is not None and not hist2.dropna().empty:
|
|
|
|
|
+ plt.plot(hist2.index, hist2.values, color=colors['2y'],
|
|
|
|
|
+ marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_2))
|
|
|
|
|
+
|
|
|
|
|
+ # X-axis: only show hours 00:00 to 23:59, no label and ticks horizontal
|
|
|
|
|
+ ax = plt.gca()
|
|
|
|
|
+ ax.set_xlim(datetime.combine(now.date(), datetime.min.time()),
|
|
|
|
|
+ datetime.combine(now.date(), datetime.max.time()))
|
|
|
|
|
+ ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
|
|
|
|
|
+ ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))
|
|
|
|
|
+ for label in ax.get_xticklabels():
|
|
|
|
|
+ label.set_rotation(0)
|
|
|
|
|
+ label.set_horizontalalignment('center')
|
|
|
|
|
+ ax.set_xlabel('') # remove 'Hour of day' label
|
|
|
|
|
+
|
|
|
|
|
+ plt.title(f"{ylabel} - {now.strftime('%d.%m.%Y')}")
|
|
|
|
|
+ plt.ylabel(ylabel)
|
|
|
|
|
+ plt.grid()
|
|
|
|
|
+ plt.legend(title='Year')
|
|
|
|
|
+
|
|
|
|
|
+ # Build stats table: columns Min, Max, Avg; rows = years (YYYY)
|
|
|
|
|
+ table_years = []
|
|
|
|
|
+ table_data = []
|
|
|
|
|
+
|
|
|
|
|
+ def series_stats(s):
|
|
|
|
|
+ if s is None or s.dropna().empty:
|
|
|
|
|
+ return ('-', '-', '-')
|
|
|
|
|
+ return (f"{s.min():.2f}", f"{s.max():.2f}", f"{s.mean():.2f}")
|
|
|
|
|
+
|
|
|
|
|
+ # Current year
|
|
|
|
|
+ table_years.append(str(year_now))
|
|
|
|
|
+ table_data.append(series_stats(main_series))
|
|
|
|
|
+ # 1y
|
|
|
|
|
+ if hist1 is not None:
|
|
|
|
|
+ table_years.append(str(year_1))
|
|
|
|
|
+ table_data.append(series_stats(hist1))
|
|
|
|
|
+ # 2y
|
|
|
|
|
+ if hist2 is not None:
|
|
|
|
|
+ table_years.append(str(year_2))
|
|
|
|
|
+ table_data.append(series_stats(hist2))
|
|
|
|
|
+
|
|
|
|
|
+ # Create table as matplotlib table occupying full width at bottom of plot
|
|
|
|
|
+ col_labels = ['Min', 'Max', 'Avg']
|
|
|
|
|
+ cell_text = table_data
|
|
|
|
|
+ # place table below plot: use bbox to span full width
|
|
|
|
|
+ the_table = plt.table(cellText=cell_text,
|
|
|
|
|
+ rowLabels=table_years,
|
|
|
|
|
+ colLabels=col_labels,
|
|
|
|
|
+ cellLoc='center',
|
|
|
|
|
+ rowLoc='center',
|
|
|
|
|
+ colLoc='center',
|
|
|
|
|
+ loc='bottom',
|
|
|
|
|
+ bbox=[0.0, -0.35, 1.0, 0.25]) # left, bottom, width, height
|
|
|
|
|
+
|
|
|
|
|
+ the_table.auto_set_font_size(False)
|
|
|
|
|
+ the_table.set_fontsize(10)
|
|
|
|
|
+
|
|
|
|
|
+ plt.subplots_adjust(bottom=0.28) # make room for the table
|
|
|
|
|
+ outname = f"{out_prefix}.png"
|
|
|
|
|
+ plt.savefig(outname, bbox_inches='tight')
|
|
|
|
|
+ plt.close()
|
|
|
|
|
+ print("Saved", outname)
|
|
|
|
|
+
|
|
|
|
|
+# Generate plots
|
|
|
|
|
+plot_with_history(recent_resampled, 'Temperature', 'Temperature (°C)', 'Temperature')
|
|
|
|
|
+plot_with_history(recent_resampled, 'Pressure', 'Pressure (hPa)', 'Pressure')
|
|
|
|
|
+plot_with_history(recent_resampled, 'Humidity', 'Humidity (%)', 'Humidity')
|