import pandas as pd from datetime import datetime, timedelta import sys import json dt=datetime.now().strftime("%Y%m%d") df=pd.read_csv("/var/log/apache2/archive/nextcloud-access-csv.log-%s" % dt, header=None, names=["timestamp","user_agent"], parse_dates=["timestamp"]) ## EDGE # Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57 ## Chrome # Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 ## Firefox # Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0 ## Safari # Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15 def get_norm_user_agent(s): s = s.lower() ## Nextcloud Talk App if "nextcloud-talk" in s: if "android" in s: return "Nextcloud_Talk_Android" elif "ios" in s: return "Nextcloud_Talk_iOS" else: return "Nextcloud_Talk_unknown" ## Nextcloud Files App if "nextcloud-android" in s: return "Nextcloud_Android" ## Browser if "chrome" in s: if "edg" in s: return "Edge_browser" else: return "Chrome_browser" if "firefox" in s: return "Firefox_browser" if "macintosh" in s and "applewebkit" in s: return "Safari_browser" ## Desktop Client if "mirall" in s: if "linux" in s: return "Desktop_Client_Linux" elif "macintosh" in s: return "Desktop_Client_Mac" elif "windows" in s: return "Desktop_Client_Windows" else: return "Desktop_Client_Unknown" df_24h = df[df.timestamp>datetime.now() - timedelta(hours=24)] df_24h["norm_user_agent"] = df_24h.user_agent.apply(get_norm_user_agent) df_24h_agg = df_24h.value_counts("norm_user_agent") json.dump(df_24h_agg.to_dict(), open("/var/lib/zabbix/output/apache2_user_agent_metrics.json","w"))