diff --git a/.DS_Store b/.DS_Store index 976d4e6..2d32235 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/clipper/analyser.py b/clipper/analyser.py new file mode 100644 index 0000000..b2ff289 --- /dev/null +++ b/clipper/analyser.py @@ -0,0 +1,74 @@ +import scipy +import numpy as np +import logging +import matplotlib.pyplot as plt + +from datetime import datetime + +from clipper.chat import CHAT_DIVIDER + +logger = logging.getLogger(__name__) + + +class ChatAnalyser: + def run(self, chat_file, peaks_output_file, peaks_output_chart): + dates = self._read_message_dates(chat_file) + messages_per_minute = self._group_dates(dates) + peaks = self._find_peeks(messages_per_minute, peaks_output_file, peaks_output_chart) + logger.info("Found peaks: %s for file %s", peaks, chat_file) + return peaks + + def _read_message_dates(self, chat_file): + dates = [] + + with open(chat_file, "r") as stream: + while True: + + line = stream.readline() + if not line: + break + + message_data = line.split(CHAT_DIVIDER) + if len(message_data) != 3: + # Wrong line format + continue + + date = message_data[0] + dates.append(self._parse_date(date)) + return dates + + def _parse_date(self, date_str): + return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.%f") + + def _group_dates(self, dates): + groups = {} + for d in dates: + key = datetime.strftime(d, "%Y-%m-%d %H:%M") + if key in groups.keys(): + groups[key] = groups[key] + 1 + else: + groups[key] = 0 + + groups.values() + return groups + + def _find_peeks(self, messages_per_minute, peaks_output_file, peaks_output_chart): + y_coordinates = list(messages_per_minute.values()) + x_coordinates = list(messages_per_minute.keys()) + peak_indices = scipy.signal.find_peaks_cwt(np.array(y_coordinates), 1) + + x_hours = [x.split(" ")[1] for x in x_coordinates] + fig, ax = plt.subplots() + ax.plot(x_hours, y_coordinates) + fig.autofmt_xdate() + plt.xlabel("Time") + plt.ylabel("Count") + plt.title("Stream chat reaction") + plt.savefig(peaks_output_chart) + + peak_values = [x_coordinates[index] for index in peak_indices] + with open(peaks_output_file, "w") as stream: + for peak in peak_values: + stream.writelines(f"{peak}\n") + + return peak_indices diff --git a/requirements.txt b/requirements.txt index 1febc67..6c938e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ requests==2.28.1 streamlink==4.2.0 twitchAPI==2.5.7 -irc==20.1.0 \ No newline at end of file +irc==20.1.0 +scipy==1.9.0 +matplotlib==3.5.2 \ No newline at end of file