# pretreatment (filtering, fixups), applied in order # the syntax is meant to be suitable for inclusion into Apache config # regexps must be in double quotes. Double quotes can be backslash-quoted. # good reference about Python regexp: http://www.amk.ca/python/howto/regex/regex.html # # short intro to things that may be special to Python: # (?: ) non-capturing group # (?P ) named group # (FIXME: need to check if all these are supported in Apache) # # This directive applies only to the "offline parsing" script. Apache doesn't see # the log line before it constructs and writes it at the end of request processing. # Thus, Apache ignores this directive. # # It serves to # 1) split a line of the log file into the relevant fragments # 2) ignore log lines that don't match # # The expression needs to result into the following six match groups: # (IP, timestamp, url, status, referer, ua, country) # FIXME: country should be optional, because it occurs only in a MirrorBrain logfile # # - - [23/Nov/2009:18:19:14 +0100] "GET /files/stable/3.1.1/OOo_3.1.1_MacOSXIntel_install_en-US.dmg HTTP/1.1" 302 399 "http://download.openoffice.org/all_rc.html" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 1.1.4322; .NET CLR 3.5.30729; .NET CLR 3.0.30618)" ftp.astral.ro r:country 913 844 EU:RO ASN:9050 P: size:24661382 - # - - [04/Oct/2010:01:45:27 +0200] "GET /libreoffice/testing/LO_3.3.0-beta1_Win_x86_install_en-US.exe HTTP/1.1" 302 235 "http://www.documentfoundation.org/download/" "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv: Gecko/20100722 Firefox/3.6.8" want:file give:redirect r:country tdf.unixheads.org NA:US ASN:22773 P: 499 649 size:144783373 - # # 200 is returned for files that are not on mirrors, and for metalinks # StatsLogMask "^(\S+).+\[(.*?)\] \"GET (\S*) HTTP.*\" (200|302) [^\"]+ \"([^\"]*)\" \"([^\"]*)\".* .* \w\w:(\w\w) ASN:" #StatsLogMask "^(\S+).+\[(.*?)\] \"GET (\S*) HTTP.*\" (200|302) [^\"]+ \"([^\"]*)\" \"([^\"]*)\".*" # FIXME: we should have a separate directive that determines which status codes # are considered for logging, which is read both by the script and by # Apache # #StatsLogStatus 200 #StatsLogStatus 302 # # Before doing anything else, silently ignore all files matching the following # regular expression. (All others will be considered for counting.) # # ignore all requests that come with query string StatsIgnoreMask ".*\?.*" # # ignore files with these endings StatsIgnoreMask "^.*\.(md5|sha1|sha256|magnet|btih|mirrorlist|ico)$" # ignore the sources StatsIgnoreMask "^/libreoffice/src/.*$" # ignore the complete "old" tree StatsIgnoreMask "^/libreoffice/old/.*$" # Ignore all requests from this host (string prefix match), and don't process # the log lines at all. #StatsIgnoreIP # # Drop recurring identical requests # # define the size of a sliding window for remembering the last requests, # while parsing the log. Keyed by (ip, url, referer, user-agent), # every requests is checked whether it has been seen in identical form before. StatsDupWindow 200 # FIXME: Apache will have to filter on time instead of number of requests, # for practical reasons (memcached automates this nicely) # # Apply the following series of filters to the request URL # # strip prefixed protocol (normally only sent to proxies, but can occur in the wild) # (the filter is applied to the requested url.) StatsPrefilter "^http://[^/]+/" "" # remove duplicated slashes StatsPrefilter "/+" "/" # strip leading base path StatsPrefilter "^/libreoffice/" "" # strip .metalink and other suffixes StatsPrefilter "\.(metalink|meta4|torrent|zsync)$" "" # # StatsCount translates the remaining URL into the pieces to be logged # # OBSOLETE filenames: #testing/LO_3.3.0-beta1_Win_x86_install_en-US.exe #testing/LO_3.3.0-beta1_MacOS_x86_install_en-US.dmg #testing/LO_3.3.0-beta1_Linux_x86-64_install-deb_en-US.tar.gz #testing/LO_3.3.0-beta1_Linux_x86-64_install-rpm_en-US.tar.gz #testing/LO_3.3.0-beta1_Linux_x86_install-deb_en-US.tar.gz #testing/LO_3.3.0-beta1_Linux_x86_install-rpm_en-US.tar.gz StatsCount "^testing/LO_([^_/]+)_(Win|MacOS|Linux)_(x86|x86-64)_(install|install-deb|install-rpm)_(?P[a-zA-Z]{2}(-[a-zA-Z]{2})?)\.(?:exe|dmg|tar\.gz|tar\.bz2)$" "prod: LO os: \2-\3 version: \1 lang: \g" # stable/3.3.0/mac/x86/LibO_3.3.0_MacOS_x86_install_en-US.dmg # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_helppack_pt.exe # stable/3.3.0/deb/x86_64/LibO_3.3.0_Linux_x86-64_helppack-deb_fa.tar.gz # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/deb/x86/LibO_3.3.0_Linux_x86_install-deb_en-US.tar.gz # stable/3.3.0/win/x86/BrOffice_3.3.0_Win_x86_install_multi.exe # stable/3.3.0/rpm/x86_64/LibO_3.3.0_Linux_x86-64_helppack-rpm_sd.tar.gz # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_helppack_sv.exe # stable/3.3.0/win/x86/LibO_3.3.0_Win_x86_helppack_sw-TZ.exe StatsCount "^(stable|testing)/(?P[^/]+)/(deb|mac|rpm|win)/(x86|x86_64|ppc)/(?PLibO|BrOffice)_\2_(?PWin|MacOS|Linux)_(?Px86|x86-64|PPC)_(install|langpack|helppack)(-rpm|-deb)?_(?P([a-zA-Z]{2}(-[a-zA-Z]{2})?|multi|all_lang|[a-zA-Z]{3}))\.(?:exe|dmg|tar\.gz|tar\.bz2)$" "prod: \g os: \g-\g version: \g lang: \g" # portable/3.3.0/LibreOfficePortable_3.3.0.paf.exe # portable/3.3.0/LibreOfficePortable_3.3.0.paf.exe.zsync StatsCount "^portable/(?P[^/]+)/(?PLibreOfficePortable)_\1.paf.exe$" "prod: \g os: win version: \g lang: multi" # box/3.3.0/LibO-3.3.0-1_DVD_allplatforms_de.iso StatsCount "^box/(?P[^/]+)/LibO-\1-(?P[0-9]+)_DVD_allplatforms_(?P([a-zA-Z]{2}(-[a-zA-Z]{2})?|multi|all_lang|[a-zA-Z]{3})).iso$" "prod: LibO-DVD os: allplatforms version: \g-\g lang: \g" # # Filters to be applied after parsing (but still before counting) # #StatsPostfilter "foo" "bar" StatsPostfilter "(prod|os|version|lang): " "" # FIXME: we didn't need to add those words in the first place; not needed at all. # vim: ft=apache ai ts=4 sw=4 smarttab expandtab smarttab