import pandas as pd from datetime import datetime import sys import json dt = datetime.now().strftime("%Y%m%d") df = pd.read_csv( "/var/log/apache2/archive/nextcloud-access-csv.log-%s" % dt, header=None, names=["timestamp","user_agent"], parse_dates=["timestamp"] ) ## EDGE # Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57 ## Chrome # Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 ## Firefox # Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0 ## Safari # Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15 def get_norm_user_agent(s): s = s.lower() ## Nextcloud Talk App if "nextcloud-talk" in s: if "android" in s: return "Nextcloud_Talk_Android" elif "ios" in s: return "Nextcloud_Talk_iOS" else: return "Nextcloud_Talk_unknown" ## Nextcloud Files App if "nextcloud-android" in s: return "Nextcloud_Android" ## Browser if "chrome" in s: if "edg" in s: return "Edge_browser" else: return "Chrome_browser" if "firefox" in s: return "Firefox_browser" if "macintosh" in s and "applewebkit" in s: return "Safari_browser" ## Desktop Client if "mirall" in s: if "linux" in s: return "Desktop_Client_Linux" elif "macintosh" in s: return "Desktop_Client_Mac" elif "windows" in s: return "Desktop_Client_Windows" else: return "Desktop_Client_Unknown" ## DavX5, CardDAV / CalDAV sync agent on Android if "davx5" in s: return "DavX5" ## Thunderbird, CardDAV / CalDAV sync agent on Desktop if "thunderbird" in s: return "Thunderbird" ## Python scripting if "python" in s: return "python" return "other" df["norm_user_agent"] = df.user_agent.apply(get_norm_user_agent) df_agg = df.value_counts("norm_user_agent") json.dump( df_agg.to_dict(), open("/var/lib/zabbix/output/apache2_user_agent_metrics.json","w") )