Enhanced and refactored 'collect_os_info.py'

Changed the script to functional programming paradigm to remove the big number of local attributes to decrease memory usage when running it. Additional OS info are now included.
This commit is contained in:
Tareq Al-Ahdal
2022-08-13 06:13:05 +08:00
parent 5e477e3b5b
commit 49f889bf09
3 changed files with 167 additions and 252 deletions

View File

@@ -0,0 +1,167 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Collection of host system information including software versions, memory,
storage, and database configuration.
"""
import os
import subprocess
import sys
from pathlib import Path
from typing import List, Optional, Tuple, Union, cast
import psutil
from psycopg2.extensions import make_dsn, parse_dsn
from nominatim.config import Configuration
from nominatim.db.connection import connect
from nominatim.typing import DictCursorResults
from nominatim.version import version_str
def convert_version(ver_tup: Tuple[int, int]) -> str:
"""converts tuple version (ver_tup) to a string representation"""
return ".".join(map(str, ver_tup))
def friendly_memory_string(mem: float) -> str:
"""Create a user friendly string for the amount of memory specified as mem"""
mem_magnitude = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
mag = 0
# determine order of magnitude
while mem > 1000:
mem /= 1000
mag += 1
return f"{mem:.1f} {mem_magnitude[mag]}"
def run_command(cmd: Union[str, List[str]]) -> str:
"""Runs a command using the shell and returns the output from stdout"""
try:
if sys.version_info < (3, 7):
cap_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=False)
else:
cap_out = subprocess.run(cmd, capture_output=True, check=False)
return cap_out.stdout.decode("utf-8")
except FileNotFoundError:
# non-Linux system should end up here
return f"Unknown (unable to find the '{cmd}' command)"
def os_name_info() -> str:
"""Obtain Operating System Name (and possibly the version)"""
os_info = None
# man page os-release(5) details meaning of the fields
if Path("/etc/os-release").is_file():
os_info = from_file_find_line_portion(
"/etc/os-release", "PRETTY_NAME", "=")
# alternative location
elif Path("/usr/lib/os-release").is_file():
os_info = from_file_find_line_portion(
"/usr/lib/os-release", "PRETTY_NAME", "="
)
# fallback on Python's os name
if os_info is None or os_info == "":
os_info = os.name
# if the above is insufficient, take a look at neofetch's approach to OS detection
return os_info
# Note: Intended to be used on informational files like /proc
def from_file_find_line_portion(
filename: str, start: str, sep: str, fieldnum: int = 1
) -> Optional[str]:
"""open filename, finds the line starting with the 'start' string.
Splits the line using seperator and returns a "fieldnum" from the split."""
with open(filename, encoding='utf8') as file:
result = ""
for line in file:
if line.startswith(start):
result = line.split(sep)[fieldnum].strip()
return result
def get_postgresql_config(version: int) -> str:
"""Retrieve postgres configuration file"""
try:
with open(f"/etc/postgresql/{version}/main/postgresql.conf", encoding='utf8') as file:
db_config = file.read()
file.close()
return db_config
except IOError:
return f"**Could not read '/etc/postgresql/{version}/main/postgresql.conf'**"
def report_system_information(config: Configuration) -> None:
"""Generate a report about the host system including software versions, memory,
storage, and database configuration."""
with connect(make_dsn(config.get_libpq_dsn(), dbname='postgres')) as conn:
postgresql_ver: str = convert_version(conn.server_version_tuple())
with conn.cursor() as cur:
cur.execute(f"""
SELECT datname FROM pg_catalog.pg_database
WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
if nominatim_db_exists:
with connect(config.get_libpq_dsn()) as conn:
postgis_ver: str = convert_version(conn.postgis_version_tuple())
else:
postgis_ver = "Unable to connect to database"
postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
# Note: psutil.disk_partitions() is similar to run_command("lsblk")
# Note: run_command("systemd-detect-virt") only works on Linux, on other OSes
# should give a message: "Unknown (unable to find the 'systemd-detect-virt' command)"
# Generates the Markdown report.
report = f"""
**Instructions**
Use this information in your issue report at https://github.com/osm-search/Nominatim/issues
Redirect the output to a file:
$ ./collect_os_info.py > report.md
**Software Environment:**
- Python version: {sys.version}
- Nominatim version: {version_str()}
- PostgreSQL version: {postgresql_ver}
- PostGIS version: {postgis_ver}
- OS: {os_name_info()}
**Hardware Configuration:**
- RAM: {friendly_memory_string(psutil.virtual_memory().total)}
- number of CPUs: {psutil.cpu_count(logical=False)}
- bare metal/AWS/other cloud service (per systemd-detect-virt(1)): {run_command("systemd-detect-virt")}
- type and size of disks:
**`df -h` - df - report file system disk space usage: **
```
{run_command(["df", "-h"])}
```
**lsblk - list block devices: **
```
{run_command("lsblk")}
```
**Postgresql Configuration:**
```
{postgresql_config}
```
**Notes**
Please add any notes about anything above anything above that is incorrect.
"""
print(report)

View File

@@ -1,158 +0,0 @@
import os
from pathlib import Path
import subprocess
import sys
from typing import Optional, Union
# external requirement
import psutil
# from nominatim.version import NOMINATIM_VERSION
# from nominatim.db.connection import connect
class ReportSystemInformation:
"""Generate a report about the host system including software versions, memory,
storage, and database configuration."""
def __init__(self):
self._memory: int = psutil.virtual_memory().total
self.friendly_memory: str = self._friendly_memory_string(self._memory)
# psutil.cpu_count(logical=False) returns the number of CPU cores.
# For number of logical cores (Hypthreaded), call psutil.cpu_count() or os.cpu_count()
self.num_cpus: int = psutil.cpu_count(logical=False)
self.os_info: str = self._os_name_info()
### These are commented out because they have not been tested.
# self.nominatim_ver: str = '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)
# self._pg_version = conn.server_version_tuple()
# self._postgis_version = conn.postgis_version_tuple()
# self.postgresql_ver: str = self._convert_version(self._pg_version)
# self.postgis_ver: str = self._convert_version(self._postgis_version)
self.nominatim_ver: str = ""
self.postgresql_ver: str = ""
self.postgresql_config: str = ""
self.postgis_ver: str = ""
# the below commands require calling the shell to gather information
self.disk_free: str = self._run_command(["df", "-h"])
self.lsblk: str = self._run_command("lsblk")
# psutil.disk_partitions() <- this function is similar to the above, but it is cross platform
# Note: `systemd-detect-virt` command only works on Linux, on other OSes
# should give a message: "Unknown (unable to find the 'systemd-detect-virt' command)"
self.container_vm_env: str = self._run_command("systemd-detect-virt")
def _convert_version(self, ver_tup: tuple) -> str:
"""converts tuple version (ver_tup) to a string representation"""
return ".".join(map(str,ver_tup))
def _friendly_memory_string(self, mem: int) -> str:
"""Create a user friendly string for the amount of memory specified as mem"""
mem_magnitude = ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
mag = 0
# determine order of magnitude
while mem > 1000:
mem /= 1000
mag += 1
return f"{mem:.1f} {mem_magnitude[mag]}"
def _run_command(self, cmd: Union[str, list]) -> str:
"""Runs a command using the shell and returns the output from stdout"""
try:
if sys.version_info < (3, 7):
cap_out = subprocess.run(cmd, stdout=subprocess.PIPE)
else:
cap_out = subprocess.run(cmd, capture_output=True)
return cap_out.stdout.decode("utf-8")
except FileNotFoundError:
# non-Linux system should end up here
return f"Unknown (unable to find the '{cmd}' command)"
def _os_name_info(self) -> str:
"""Obtain Operating System Name (and possibly the version)"""
os_info = None
# man page os-release(5) details meaning of the fields
if Path("/etc/os-release").is_file():
os_info = self._from_file_find_line_portion("/etc/os-release", "PRETTY_NAME", "=")
# alternative location
elif Path("/usr/lib/os-release").is_file():
os_info = self._from_file_find_line_portion("/usr/lib/os-release", "PRETTY_NAME", "=")
# fallback on Python's os name
if(os_info is None or os_info == ""):
os_info = os.name
# if the above is insufficient, take a look at neofetch's approach to OS detection
return os_info
# Note: Intended to be used on informational files like /proc
def _from_file_find_line_portion(self, filename: str, start: str, sep: str,
fieldnum: int = 1) -> Optional[str]:
"""open filename, finds the line starting with the 'start' string.
Splits the line using seperator and returns a "fieldnum" from the split."""
with open(filename) as fh:
for line in fh:
if line.startswith(start):
result = line.split(sep)[fieldnum].strip()
return result
def report(self, out = sys.stdout, err = sys.stderr) -> None:
"""Generates the Markdown report.
Optionally pass out or err parameters to redirect the output of stdout
and stderr to other file objects."""
# NOTE: This should be a report format. Any conversions or lookup has be
# done, do that action in the __init__() or another function.
message = """
Use this information in your issue report at https://github.com/osm-search/Nominatim/issues
Copy and paste or redirect the output of the file:
$ ./collect_os_info.py > report.md
"""
report = f"""
**Software Environment:**
- Python version: {sys.version}
- Nominatim version: {self.nominatim_ver}
- PostgreSQL version: {self.postgresql_ver}
- PostGIS version: {self.postgis_ver}
- OS: {self.os_info}
**Hardware Configuration:**
- RAM: {self.friendly_memory}
- number of CPUs: {self.num_cpus}
- bare metal/AWS/other cloud service (per systemd-detect-virt(1)): {self.container_vm_env}
- type and size of disks:
**`df -h` - df - report file system disk space usage: **
```
{self.disk_free}
```
**lsblk - list block devices: **
```
{self.lsblk}
```
**Postgresql Configuration:**
```
{self.postgresql_config}
```
**Notes**
Please add any notes about anything above anything above that is incorrect.
"""
print(message, file = err)
print(report, file = out)
if __name__ == "__main__":
sys_info = ReportSystemInformation()
sys_info.report()

View File

@@ -1,94 +0,0 @@
#!/usr/bin/env bash
Description="The purpose of this script is to collect system information for bug reports.\n
Submit issues to https://github.com/osm-search/Nominatim/issues"
####### Gather the Information ##################################################
# Separate the information gathering from the report generation. Dividing these
# makes it easier to make trivial changes by not have to learn the other portion
# of this script.
# Nominatium version
# NOTE: Getting this version will NOT work if it is being ran from in another
# folder than Nominatim/utils. It call python3 to import version.py locally and
# prints it in the version format.
NominatimVersion=`cd ../nominatim/ && python3 -c "import version; print('{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version.NOMINATIM_VERSION))"`
# PostgreSQL version
PostgreSQLVersion=`postgres --version`
if [ "$?" -ne "0" ]
then
PostgreSQLVersion="Not installed"
fi
# - PostGIS version:
# The command for this should look something like this:
# psql -U nominatim -d mydatabase -c 'SELECT PostGIS_full_version();'
# ASSUME the username is nominatim
# This needs to be ran under the account with the appropriate permissions.
# This has been left blank.
PostGISVersion=
# There are different ways to getting the Linux OS information.
# https://www.cyberciti.biz/faq/how-to-check-os-version-in-linux-command-line/
# /etc/os-release has a number of representations of the OS
# PRETTY_NAME is pity.
OperatingSystem=`grep '^PRETTY_NAME' /etc/os-release | cut -d'=' -f2`
RAM=`grep ^MemTotal /proc/meminfo | cut -d':' -f2`
# In /proc/cupinfo: siblings seems to refer to total cores like hyperthreaded cores.
# The hyperthreaded cores could be included if that is needed.
NumCPUs=`grep '^cpu cores' /proc/cpuinfo | head -1 | cut -d':' -f2`
# - type and size of disks:
# could use `sudo fdisk -l` or `mount` to print this, but snaps have made this
# worse than useless with loop devices on Ubuntu.
# `df -h` - show the free space on drives
# `lsblk` - this tell you what the server has not necessarily this machine. So in a container environment
# (like docker) this wouldn't be the correct report.
# This guide shows ways to get various storage device information: https://www.cyberciti.biz/faq/find-hard-disk-hardware-specs-on-linux/
# - bare metal/AWS/other cloud service:
# Unsure of how to detect this, but it might be useful for reporting disk storage.
# One options would be to prompt the user something like this:
# Enter system configuration (1) bare metal (2) AWS (3) Other Cloud (4) Docker (5) Other: _
# ------ What do these commands do? -------------------------------------------
# "cut -d':' -f2" command take the line and splits it at the semicolon(:)
# and returns the portion in the second (2nd) "field"
#
# "head -1" returns the first line that matches
#
####### Print the Markdown Report ######################################################
# 1>&2 redirects echo to print to stderr instead of stdout
echo 1>&2
echo -e $Description 1>&2
echo Copy and paste or redirect the output of the file: 1>&2
echo " \$ ./collect_os_info.sh > report.md" 1>&2
echo 1>&2
echo "**Software Environment (please complete the following information):**"
echo - Nominatim version: $NominatimVersion
echo - PostgreSQL version: $PostgreSQLVersion
echo - PostGIS version: $PostGISVersion
echo - OS: $OperatingSystem
echo
echo
echo "**Hardware Configuration (please correct the following information):**"
echo - RAM: $RAM
echo - number of CPUs: $NumCPUs
echo - type and size of disks:
echo - bare metal/AWS/other cloud service:
echo
echo
echo **Postgresql Configuration:**
echo