from pydatajson.helpers import is_working_url
from pydatajson.readers import read_catalog
from pydatajson.reporting import generate_datasets_summary
[documentos]class StatusIndicatorsGenerator(object):
def __init__(self, catalog, validator=None):
self.download_url_ok = None
self.catalog = read_catalog(catalog)
self.summary = generate_datasets_summary(self.catalog,
validator=validator)
[documentos] def datasets_cant(self):
return len(self.summary)
[documentos] def distribuciones_cant(self):
return sum(ds['cant_distribuciones'] for ds in self.summary)
[documentos] def datasets_con_datos_cant(self):
return sum(ds['tiene_datos'] == 'SI' for ds in self.summary)
[documentos] def datasets_sin_datos_cant(self):
return sum(ds['tiene_datos'] == 'NO' for ds in self.summary)
[documentos] def datasets_con_datos_pct(self):
return self._get_dataset_percentage(self.datasets_con_datos_cant)
[documentos] def distribuciones_download_url_ok_cant(self):
return self.download_url_ok or self._validate_download_urls()
[documentos] def distribuciones_download_url_error_cant(self):
return self.distribuciones_cant() - \
self.distribuciones_download_url_ok_cant()
[documentos] def distribuciones_download_url_ok_pct(self):
total = self.distribuciones_cant()
if not total:
return None
return \
round(float(self.distribuciones_download_url_ok_cant()) / total, 4)
def _validate_download_urls(self):
result = 0
for dataset in self.catalog.get('dataset', []):
for distribution in dataset.get('distribution', []):
valid = is_working_url(distribution.get('downloadURL', ''))
result += valid
# Guardo el resultado una vez calculado
self.download_url_ok = result
return result
def _get_dataset_percentage(self, indicator):
total = self.datasets_cant()
if not total:
return None
return round(float(indicator()) / total, 4)