genarate_charts.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # This script is largely vibecoded, changed by me, reviewed, and tested on my machine.
  2. # ####################################################################################
  3. # Reads sensor text files from a top-level folder ("temperature") and from historical subfolders ("2024", "2023"). Filenames are Unix timestamps like 1700000000.txt.
  4. # Each file is parsed for lines containing "Temperature", "Pressure", and "Humidity" and the numeric values after the colon. Only files that contain all three values are kept.
  5. # Loads recent data from the top-level folder for the last 24 hours (since yesterday). Exits if no recent data found.
  6. # Loads historical data from the specified subfolders, deduplicates identical timestamps by averaging, and builds a dense 1-minute interpolated historical timeseries when historical data exists.
  7. # Resamples the recent (last-24h) data to 5-minute intervals.
  8. # For each timestamp in the recent series, it attempts to build two historical series:
  9. # - 1 year back and 2 years back (same calendar time shifted by 1 and 2 years).
  10. # - For each target historical time it first tries an exact match in historical data; if none, it looks for the nearest timestamp within ±1 day; if still none, it attempts time interpolation from the dense 1-minute historical series. If all fail, it produces NaN.
  11. #
  12. # For each of Temperature, Pressure, and Humidity:
  13. # - Aligns recent data to today's date (time-of-day) and plots it (blue) for the last 24 hours resampled at 5 minutes.
  14. # - Plots the 1-year-back (orange) and 2-year-back (green) series aligned to the same time-of-day axis, if available.
  15. # - X-axis covers 00:00–23:59 of today with hour ticks every 2 hours.
  16. # - Adds a small stats table below the plot showing Min, Max, Avg for each available year (current, 1y, 2y).
  17. # - Saves each plot as Temperature.png, Pressure.png, Humidity.png and prints the saved filename.
  18. #
  19. # The current year is located in the folder temperature
  20. # all previous years are moved to folders temperature/YYYY
  21. # every 5th minute a new unixtimestamp.txt magically (it's another script) appears in the folder temperature,
  22. # this script runs at 23:56, when the last 5th minute of the day is over and there should be all collected data
  23. # points are collected.
  24. #
  25. # Because I developed all this stuff over a long period of time, the data is stored in text files with the following content
  26. # Temperature : 10.28 °C
  27. # Pressure : 979.6165 hPa
  28. # Humidity : 100 %
  29. #
  30. # So I had to deal with this now, knowing that it would be much easier to store all the data points in some database or even structured text files.
  31. # On the other hand I could rewrite a lot of things, risking to break more things than I want to.
  32. #
  33. import os
  34. import pandas as pd
  35. import matplotlib.pyplot as plt
  36. from datetime import datetime, timedelta
  37. import matplotlib.dates as mdates
  38. # Config
  39. data_folder = 'temperature'
  40. historical_folders = ['2024', '2023'] # folders containing historical files
  41. now = datetime.now()
  42. yesterday = now - timedelta(days=1)
  43. timestamp_tolerance = pd.Timedelta(days=1) # nearest match allowed within ±1 day
  44. colors = {
  45. 'current': 'tab:blue',
  46. '1y': 'tab:orange',
  47. '2y': 'tab:green'
  48. }
  49. marker = 'o'
  50. linewidth = 2
  51. markersize = 4
  52. def read_folder(folder_path, since=None):
  53. timestamps, temps, pres, hums = [], [], [], []
  54. if not os.path.isdir(folder_path):
  55. return pd.DataFrame(columns=['Timestamp','Temperature','Pressure','Humidity']).set_index('Timestamp')
  56. for fname in os.listdir(folder_path):
  57. if not fname.endswith('.txt'):
  58. continue
  59. try:
  60. ts = int(fname[:-4])
  61. except ValueError:
  62. continue
  63. dt = datetime.fromtimestamp(ts)
  64. if since is not None and dt < since:
  65. continue
  66. t_val = p_val = h_val = None
  67. with open(os.path.join(folder_path, fname), 'r') as f:
  68. for line in f:
  69. if 'Temperature' in line:
  70. try:
  71. t_val = float(line.split(':')[1].strip().split(' ')[0])
  72. except:
  73. t_val = None
  74. elif 'Pressure' in line:
  75. try:
  76. p_val = float(line.split(':')[1].strip().split(' ')[0])
  77. except:
  78. p_val = None
  79. elif 'Humidity' in line:
  80. try:
  81. h_val = float(line.split(':')[1].strip().split(' ')[0])
  82. except:
  83. h_val = None
  84. if t_val is not None and p_val is not None and h_val is not None:
  85. timestamps.append(dt)
  86. temps.append(t_val)
  87. pres.append(p_val)
  88. hums.append(h_val)
  89. df = pd.DataFrame({'Timestamp': timestamps, 'Temperature': temps, 'Pressure': pres, 'Humidity': hums})
  90. if df.empty:
  91. return df.set_index('Timestamp')
  92. df.set_index('Timestamp', inplace=True)
  93. df.sort_index(inplace=True)
  94. return df
  95. # Load recent data (last 24h) from top-level folder
  96. recent_df = read_folder(data_folder, since=yesterday)
  97. if recent_df.empty:
  98. print("No data found for the last 24 hours in the top-level folder.")
  99. exit()
  100. # Load historical data from subfolders
  101. hist_frames = []
  102. for sub in historical_folders:
  103. path = os.path.join(data_folder, sub)
  104. hist_frames.append(read_folder(path))
  105. historical_df = pd.concat(hist_frames).sort_index() if hist_frames else pd.DataFrame(columns=recent_df.columns)
  106. if historical_df.empty:
  107. print("Warning: no historical data found in subfolders:", historical_folders)
  108. # Deduplicate historical timestamps by averaging duplicates
  109. if not historical_df.empty:
  110. historical_df = historical_df.groupby(historical_df.index).mean()
  111. # Resample recent to 5-minute intervals
  112. recent_resampled = recent_df.resample('5T').mean()
  113. # Prepare dense historical series (1-minute resolution) for interpolation if we have historical data
  114. if not historical_df.empty:
  115. hist_min = historical_df.index.min()
  116. hist_max = historical_df.index.max()
  117. dense_index = pd.date_range(start=hist_min, end=hist_max, freq='1T')
  118. historical_dense = historical_df.reindex(historical_df.index.union(dense_index)).sort_index().interpolate(method='time')
  119. historical_dense = historical_dense.reindex(dense_index)
  120. else:
  121. historical_dense = pd.DataFrame(columns=recent_resampled.columns)
  122. def build_historical_series_with_interpolation(main_index, column, years_back):
  123. # Target times: same calendar time years_back earlier
  124. target_times = main_index - pd.DateOffset(years=years_back)
  125. values = []
  126. for t in target_times:
  127. if historical_df.empty:
  128. values.append(float('nan'))
  129. continue
  130. # exact
  131. if t in historical_df.index:
  132. values.append(historical_df.loc[t][column])
  133. continue
  134. # nearest within tolerance
  135. try:
  136. pos = historical_df.index.get_indexer([t], method='nearest')[0]
  137. nearest_ts = historical_df.index[pos]
  138. if abs(nearest_ts - t) <= timestamp_tolerance:
  139. values.append(historical_df.iloc[pos][column])
  140. continue
  141. except Exception:
  142. pass
  143. # interpolation from dense historical series
  144. if not historical_dense.empty and (t >= historical_dense.index.min() and t <= historical_dense.index.max()):
  145. try:
  146. interp_val = historical_dense.at[pd.to_datetime(t), column]
  147. values.append(interp_val)
  148. continue
  149. except KeyError:
  150. s = historical_dense[column].reindex(historical_dense.index.union([t])).sort_index().interpolate(method='time')
  151. values.append(s.at[pd.to_datetime(t)])
  152. continue
  153. values.append(float('nan'))
  154. # Convert target_times to time-of-day aligned to today's date for plotting
  155. aligned_times = []
  156. for t in target_times:
  157. tod = t.time()
  158. aligned_dt = datetime.combine(now.date(), tod)
  159. aligned_times.append(aligned_dt)
  160. series = pd.Series(data=values, index=pd.to_datetime(aligned_times))
  161. return series
  162. def build_current_series_timeofday(df):
  163. aligned_times = []
  164. values = []
  165. for t, row in df.iterrows():
  166. tod = t.time()
  167. aligned_dt = datetime.combine(now.date(), tod)
  168. aligned_times.append(aligned_dt)
  169. values.append(row)
  170. return pd.DataFrame(values, index=pd.to_datetime(aligned_times))
  171. def year_label_for_timestamp(ts):
  172. return ts.year
  173. def plot_with_history(df_current, column, ylabel, out_prefix):
  174. current_aligned_df = build_current_series_timeofday(df_current[[column]])
  175. main_series = current_aligned_df[column]
  176. # Determine year labels
  177. year_now = now.year
  178. year_1 = (now - pd.DateOffset(years=1)).year
  179. year_2 = (now - pd.DateOffset(years=2)).year
  180. plt.figure(figsize=(12, 6))
  181. # Plot current year (use its actual year label)
  182. plt.plot(main_series.index, main_series.values, color=colors['current'],
  183. marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_now))
  184. # Build historical series (aligned to time-of-day)
  185. hist1 = build_historical_series_with_interpolation(df_current.index, column, 1)
  186. hist2 = build_historical_series_with_interpolation(df_current.index, column, 2)
  187. if hist1 is not None and not hist1.dropna().empty:
  188. plt.plot(hist1.index, hist1.values, color=colors['1y'],
  189. marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_1))
  190. if hist2 is not None and not hist2.dropna().empty:
  191. plt.plot(hist2.index, hist2.values, color=colors['2y'],
  192. marker=marker, markersize=markersize, linewidth=linewidth, label=str(year_2))
  193. # X-axis: only show hours 00:00 to 23:59, no label and ticks horizontal
  194. ax = plt.gca()
  195. ax.set_xlim(datetime.combine(now.date(), datetime.min.time()),
  196. datetime.combine(now.date(), datetime.max.time()))
  197. ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
  198. ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))
  199. for label in ax.get_xticklabels():
  200. label.set_rotation(0)
  201. label.set_horizontalalignment('center')
  202. ax.set_xlabel('') # remove 'Hour of day' label
  203. plt.title(f"{ylabel} - {now.strftime('%d.%m.%Y')}")
  204. plt.ylabel(ylabel)
  205. plt.grid()
  206. plt.legend(title='Year')
  207. # Build stats table: columns Min, Max, Avg; rows = years (YYYY)
  208. table_years = []
  209. table_data = []
  210. def series_stats(s):
  211. if s is None or s.dropna().empty:
  212. return ('-', '-', '-')
  213. return (f"{s.min():.2f}", f"{s.max():.2f}", f"{s.mean():.2f}")
  214. # Current year
  215. table_years.append(str(year_now))
  216. table_data.append(series_stats(main_series))
  217. # 1y
  218. if hist1 is not None:
  219. table_years.append(str(year_1))
  220. table_data.append(series_stats(hist1))
  221. # 2y
  222. if hist2 is not None:
  223. table_years.append(str(year_2))
  224. table_data.append(series_stats(hist2))
  225. # Create table as matplotlib table occupying full width at bottom of plot
  226. col_labels = ['Min', 'Max', 'Avg']
  227. cell_text = table_data
  228. # place table below plot: use bbox to span full width
  229. the_table = plt.table(cellText=cell_text,
  230. rowLabels=table_years,
  231. colLabels=col_labels,
  232. cellLoc='center',
  233. rowLoc='center',
  234. colLoc='center',
  235. loc='bottom',
  236. bbox=[0.0, -0.35, 1.0, 0.25]) # left, bottom, width, height
  237. the_table.auto_set_font_size(False)
  238. the_table.set_fontsize(10)
  239. plt.subplots_adjust(bottom=0.28) # make room for the table
  240. outname = f"{out_prefix}.png"
  241. plt.savefig(outname, bbox_inches='tight')
  242. plt.close()
  243. print("Saved", outname)
  244. # Generate plots
  245. plot_with_history(recent_resampled, 'Temperature', 'Temperature (°C)', 'Temperature')
  246. plot_with_history(recent_resampled, 'Pressure', 'Pressure (hPa)', 'Pressure')
  247. plot_with_history(recent_resampled, 'Humidity', 'Humidity (%)', 'Humidity')