Sfoglia il codice sorgente

'genarate_charts.py' hinzufügen

This script reads files in the folder temperature and plots beautiful curves of the past day compared to the same day last year.
Stephan Schneider 2 mesi fa
parent
commit
a8cedb86b9
1 ha cambiato i file con 277 aggiunte e 0 eliminazioni
  1. 277 0
      genarate_charts.py

+ 277 - 0
genarate_charts.py

@@ -0,0 +1,277 @@
+# This script is largely vibecoded, changed by me, reviewed, and tested on my machine.
+# ####################################################################################
+# Reads sensor text files from a top-level folder ("temperature") and from historical subfolders ("2024", "2023"). Filenames are Unix timestamps like 1700000000.txt.
+# Each file is parsed for lines containing "Temperature", "Pressure", and "Humidity" and the numeric values after the colon. Only files that contain all three values are kept.
+# Loads recent data from the top-level folder for the last 24 hours (since yesterday). Exits if no recent data found.
+# Loads historical data from the specified subfolders, deduplicates identical timestamps by averaging, and builds a dense 1-minute interpolated historical timeseries when historical data exists.
+# Resamples the recent (last-24h) data to 5-minute intervals.
+# For each timestamp in the recent series, it attempts to build two historical series:
+#    - 1 year back and 2 years back (same calendar time shifted by 1 and 2 years).
+#    - For each target historical time it first tries an exact match in historical data; if none, it looks for the nearest timestamp within ±1 day; if still none, it attempts time interpolation from the dense 1-minute historical series. If all fail, it produces NaN.
+#
+# For each of Temperature, Pressure, and Humidity:
+#    - Aligns recent data to today's date (time-of-day) and plots it (blue) for the last 24 hours resampled at 5 minutes.
+#    - Plots the 1-year-back (orange) and 2-year-back (green) series aligned to the same time-of-day axis, if available.
+#    - X-axis covers 00:00–23:59 of today with hour ticks every 2 hours.
+#    - Adds a small stats table below the plot showing Min, Max, Avg for each available year (current, 1y, 2y).
+#    - Saves each plot as Temperature.png, Pressure.png, Humidity.png and prints the saved filename.
+#
+# The current year is located in the folder temperature
+# all previous years are moved to folders temperature/YYYY
+# every 5th minute a new unixtimestamp.txt magically (it's another script) appears in the folder temperature,
+# this script runs at 23:56, when the last 5th minute of the day is over and there should be all collected data
+# points are collected.
+#
+# Because I developed all this stuff over a long period of time, the data is stored in text files with the following content
+# Temperature :  10.28 °C
+# Pressure    :  979.6165 hPa
+# Humidity    :  100 %
+# 
+# So I had to deal with this now, knowing that it would be much easier to store all the data points in some database or even structured text files.
+# On the other hand I could rewrite a lot of things, risking to break more things than I want to.
+#
+
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+from datetime import datetime, timedelta
+import matplotlib.dates as mdates
+
+# Config
+data_folder = 'temperature'
+historical_folders = ['2024', '2023']  # folders containing historical files
+now = datetime.now()
+yesterday = now - timedelta(days=1)
+timestamp_tolerance = pd.Timedelta(days=1)  # nearest match allowed within ±1 day
+colors = {
+    'current': 'tab:blue',
+    '1y': 'tab:orange',
+    '2y': 'tab:green'
+}
+marker = 'o'
+linewidth = 2
+markersize = 4
+
+def read_folder(folder_path, since=None):
+    timestamps, temps, pres, hums = [], [], [], []
+    if not os.path.isdir(folder_path):
+        return pd.DataFrame(columns=['Timestamp','Temperature','Pressure','Humidity']).set_index('Timestamp')
+    for fname in os.listdir(folder_path):
+        if not fname.endswith('.txt'):
+            continue
+        try:
+            ts = int(fname[:-4])
+        except ValueError:
+            continue
+        dt = datetime.fromtimestamp(ts)
+        if since is not None and dt < since:
+            continue
+        t_val = p_val = h_val = None
+        with open(os.path.join(folder_path, fname), 'r') as f:
+            for line in f:
+                if 'Temperature' in line:
+                    try:
+                        t_val = float(line.split(':')[1].strip().split(' ')[0])
+                    except:
+                        t_val = None
+                elif 'Pressure' in line:
+                    try:
+                        p_val = float(line.split(':')[1].strip().split(' ')[0])
+                    except:
+                        p_val = None
+                elif 'Humidity' in line:
+                    try:
+                        h_val = float(line.split(':')[1].strip().split(' ')[0])
+                    except:
+                        h_val = None
+        if t_val is not None and p_val is not None and h_val is not None:
+            timestamps.append(dt)
+            temps.append(t_val)
+            pres.append(p_val)
+            hums.append(h_val)
+    df = pd.DataFrame({'Timestamp': timestamps, 'Temperature': temps, 'Pressure': pres, 'Humidity': hums})
+    if df.empty:
+        return df.set_index('Timestamp')
+    df.set_index('Timestamp', inplace=True)
+    df.sort_index(inplace=True)
+    return df
+
+# Load recent data (last 24h) from top-level folder
+recent_df = read_folder(data_folder, since=yesterday)
+if recent_df.empty:
+    print("No data found for the last 24 hours in the top-level folder.")
+    exit()
+
+# Load historical data from subfolders
+hist_frames = []
+for sub in historical_folders:
+    path = os.path.join(data_folder, sub)
+    hist_frames.append(read_folder(path))
+historical_df = pd.concat(hist_frames).sort_index() if hist_frames else pd.DataFrame(columns=recent_df.columns)
+if historical_df.empty:
+    print("Warning: no historical data found in subfolders:", historical_folders)
+
+# Deduplicate historical timestamps by averaging duplicates
+if not historical_df.empty:
+    historical_df = historical_df.groupby(historical_df.index).mean()
+
+# Resample recent to 5-minute intervals
+recent_resampled = recent_df.resample('5T').mean()
+
+# Prepare dense historical series (1-minute resolution) for interpolation if we have historical data
+if not historical_df.empty:
+    hist_min = historical_df.index.min()
+    hist_max = historical_df.index.max()
+    dense_index = pd.date_range(start=hist_min, end=hist_max, freq='1T')
+    historical_dense = historical_df.reindex(historical_df.index.union(dense_index)).sort_index().interpolate(method='time')
+    historical_dense = historical_dense.reindex(dense_index)
+else:
+    historical_dense = pd.DataFrame(columns=recent_resampled.columns)
+
+def build_historical_series_with_interpolation(main_index, column, years_back):
+    # Target times: same calendar time years_back earlier
+    target_times = main_index - pd.DateOffset(years=years_back)
+    values = []
+    for t in target_times:
+        if historical_df.empty:
+            values.append(float('nan'))
+            continue
+
+        # exact
+        if t in historical_df.index:
+            values.append(historical_df.loc[t][column])
+            continue
+
+        # nearest within tolerance
+        try:
+            pos = historical_df.index.get_indexer([t], method='nearest')[0]
+            nearest_ts = historical_df.index[pos]
+            if abs(nearest_ts - t) <= timestamp_tolerance:
+                values.append(historical_df.iloc[pos][column])
+                continue
+        except Exception:
+            pass
+
+        # interpolation from dense historical series
+        if not historical_dense.empty and (t >= historical_dense.index.min() and t <= historical_dense.index.max()):
+            try:
+                interp_val = historical_dense.at[pd.to_datetime(t), column]
+                values.append(interp_val)
+                continue
+            except KeyError:
+                s = historical_dense[column].reindex(historical_dense.index.union([t])).sort_index().interpolate(method='time')
+                values.append(s.at[pd.to_datetime(t)])
+                continue
+
+        values.append(float('nan'))
+
+    # Convert target_times to time-of-day aligned to today's date for plotting
+    aligned_times = []
+    for t in target_times:
+        tod = t.time()
+        aligned_dt = datetime.combine(now.date(), tod)
+        aligned_times.append(aligned_dt)
+    series = pd.Series(data=values, index=pd.to_datetime(aligned_times))
+    return series
+
+def build_current_series_timeofday(df):
+    aligned_times = []
+    values = []
+    for t, row in df.iterrows():
+        tod = t.time()
+        aligned_dt = datetime.combine(now.date(), tod)
+        aligned_times.append(aligned_dt)
+        values.append(row)
+    return pd.DataFrame(values, index=pd.to_datetime(aligned_times))
+
+def year_label_for_timestamp(ts):
+    return ts.year
+
+def plot_with_history(df_current, column, ylabel, out_prefix):
+    current_aligned_df = build_current_series_timeofday(df_current[[column]])
+    main_series = current_aligned_df[column]
+
+    # Determine year labels
+    year_now = now.year
+    year_1 = (now - pd.DateOffset(years=1)).year
+    year_2 = (now - pd.DateOffset(years=2)).year
+
+    plt.figure(figsize=(12, 6))
+    # Plot current year (use its actual year label)
+    plt.plot(main_series.index, main_series.values, color=colors['current'],
+             marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_now))
+
+    # Build historical series (aligned to time-of-day)
+    hist1 = build_historical_series_with_interpolation(df_current.index, column, 1)
+    hist2 = build_historical_series_with_interpolation(df_current.index, column, 2)
+
+    if hist1 is not None and not hist1.dropna().empty:
+        plt.plot(hist1.index, hist1.values, color=colors['1y'],
+                 marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_1))
+    if hist2 is not None and not hist2.dropna().empty:
+        plt.plot(hist2.index, hist2.values, color=colors['2y'],
+                 marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_2))
+
+    # X-axis: only show hours 00:00 to 23:59, no label and ticks horizontal
+    ax = plt.gca()
+    ax.set_xlim(datetime.combine(now.date(), datetime.min.time()),
+                datetime.combine(now.date(), datetime.max.time()))
+    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
+    ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))
+    for label in ax.get_xticklabels():
+        label.set_rotation(0)
+        label.set_horizontalalignment('center')
+    ax.set_xlabel('')  # remove 'Hour of day' label
+
+    plt.title(f"{ylabel} - {now.strftime('%d.%m.%Y')}")
+    plt.ylabel(ylabel)
+    plt.grid()
+    plt.legend(title='Year')
+
+    # Build stats table: columns Min, Max, Avg; rows = years (YYYY)
+    table_years = []
+    table_data = []
+
+    def series_stats(s):
+        if s is None or s.dropna().empty:
+            return ('-', '-', '-')
+        return (f"{s.min():.2f}", f"{s.max():.2f}", f"{s.mean():.2f}")
+
+    # Current year
+    table_years.append(str(year_now))
+    table_data.append(series_stats(main_series))
+    # 1y
+    if hist1 is not None:
+        table_years.append(str(year_1))
+        table_data.append(series_stats(hist1))
+    # 2y
+    if hist2 is not None:
+        table_years.append(str(year_2))
+        table_data.append(series_stats(hist2))
+
+    # Create table as matplotlib table occupying full width at bottom of plot
+    col_labels = ['Min', 'Max', 'Avg']
+    cell_text = table_data
+    # place table below plot: use bbox to span full width
+    the_table = plt.table(cellText=cell_text,
+                          rowLabels=table_years,
+                          colLabels=col_labels,
+                          cellLoc='center',
+                          rowLoc='center',
+                          colLoc='center',
+                          loc='bottom',
+                          bbox=[0.0, -0.35, 1.0, 0.25])  # left, bottom, width, height
+
+    the_table.auto_set_font_size(False)
+    the_table.set_fontsize(10)
+
+    plt.subplots_adjust(bottom=0.28)  # make room for the table
+    outname = f"{out_prefix}.png"
+    plt.savefig(outname, bbox_inches='tight')
+    plt.close()
+    print("Saved", outname)
+
+# Generate plots
+plot_with_history(recent_resampled, 'Temperature', 'Temperature (°C)', 'Temperature')
+plot_with_history(recent_resampled, 'Pressure', 'Pressure (hPa)', 'Pressure')
+plot_with_history(recent_resampled, 'Humidity', 'Humidity (%)', 'Humidity')