From 46d468ea2dc44f583221916640f4e9906d3db5d8 Mon Sep 17 00:00:00 2001 From: pierreaubert <1709343+pierreaubert@users.noreply.github.com> Date: Thu, 30 Nov 2023 18:58:23 +0100 Subject: [PATCH] fix: changes due to metadata name changes --- generate_common.py | 18 +++++ generate_eq_compare.py | 8 +- generate_html.py | 152 ++++++++++++++++++++++--------------- generate_meta.py | 89 +++++++++++++--------- generate_peqs.py | 2 +- generate_radar.py | 8 +- generate_stats.py | 8 +- src/website/brands.html | 1 + src/website/reviewers.html | 15 ++-- update_brands.sh | 2 +- update_website.sh | 16 ++-- 11 files changed, 191 insertions(+), 128 deletions(-) diff --git a/generate_common.py b/generate_common.py index 98bf8f1a1..f1c0a484b 100644 --- a/generate_common.py +++ b/generate_common.py @@ -24,6 +24,7 @@ import logging import os import pathlib +import re import sys import warnings @@ -33,6 +34,7 @@ import datas.metadata as metadata from spinorama import ray_setup_logger +import spinorama.constant_paths as cpaths MINIRAY = None try: @@ -356,3 +358,19 @@ def sort_meta_score(s): reverse=True, ) return {k: meta[k] for k in keys_sorted_score} + + +def find_metadata_file(): + pattern = "{}-[0-9a-f]*.json".format(cpaths.CPATH_METADATA_JSON[:-5]) + json_filenames = glob(pattern) + # print('DEBUG: {}'.format(json_filenames)) + json_filename = None + for json_maybe in json_filenames: + check = re.match(".*[-][0-9a-f]{5}[.]json$", json_maybe) + if check is not None: + json_filename = json_maybe + break + if json_filename is not None and os.path.exists(json_filename): + return json_filename + + return None diff --git a/generate_eq_compare.py b/generate_eq_compare.py index 75f9f43b6..c77707969 100755 --- a/generate_eq_compare.py +++ b/generate_eq_compare.py @@ -36,7 +36,7 @@ from docopt import docopt import numpy as np -from generate_common import get_custom_logger, args2level +from generate_common import get_custom_logger, args2level, find_metadata_file from spinorama.constant_paths import CPATH_METADATA_JSON, CPATH_DOCS_SPEAKERS, CPATH_DATAS_EQ from spinorama.need_update import need_update from spinorama.pict import write_multiformat @@ -71,9 +71,9 @@ def print_eq_compare(data, force): def main(force): # load all metadata from generated json file - json_filename = CPATH_METADATA_JSON - if not os.path.exists(json_filename): - logger.error("Cannot find %s", json_filename) + json_filename = find_metadata_file() + if json_filename is None: + logger.error("Cannot find metadata file!") sys.exit(1) jsmeta = None diff --git a/generate_html.py b/generate_html.py index f450426eb..1d31e1d80 100755 --- a/generate_html.py +++ b/generate_html.py @@ -42,8 +42,9 @@ from datas.metadata import speakers_info as extradata from generate_common import ( - get_custom_logger, args2level, + get_custom_logger, + find_metadata_file, sort_metadata_per_score, sort_metadata_per_date, ) @@ -99,55 +100,93 @@ def write_if_different(new_content, filename: str): ] +def generate_measurement( + dataframe, + meta, + site, + use_search, + speaker_name, + origins, + speaker_html, + graph_html, + origin, + measurements, + key, + dfs, +): + logger.debug("generate %s %s %s", speaker_name, origin, key) + freq = {k: dfs[k] for k in FREQ_FILTER if k in dfs} + contour = {k: dfs[k] for k in CONTOUR_FILTER if k in dfs} + radar = {k: dfs[k] for k in RADAR_FILTER if k in dfs} + # eq + eq = None + if key != "default_eq": + eq_filter = [ + "ref_vs_eq", + ] + eq = {k: dfs[k] for k in eq_filter if k in dfs} + # get index.html filename + dirname = "{}/{}/".format(cpaths.CPATH_DOCS_SPEAKERS, speaker_name) + if origin in ("ASR", "Princeton", "ErinsAudioCorner", "Misc"): + dirname += origin + else: + dirname += meta[speaker_name]["brand"] + index_name = "{0}/index_{1}.html".format(dirname, key) + + # write index.html + logger.info("Writing %s for %s", index_name, speaker_name) + speaker_content = speaker_html.render( + speaker=speaker_name, + g_freq=freq, + g_contour=contour, + g_radar=radar, + g_key=key, + g_eq=eq, + meta=meta, + origin=origin, + site=site, + use_search=use_search, + ) + write_if_different(speaker_content, index_name) + + # write a small file per graph to render the json generated by Vega + for kind in [freq, contour, radar]: + for graph_name in kind: + graph_filename = "{0}/{1}/{2}.html".format(dirname, key, graph_name) + logger.info("Writing %s/%s for %s", key, graph_filename, speaker_name) + graph_content = graph_html.render( + speaker=speaker_name, graph=graph_name, meta=meta, site=site + ) + write_if_different(graph_content, graph_filename) + + def generate_speaker( dataframe, meta, site, use_search, speaker_name, origins, speaker_html, graph_html ): for origin, measurements in origins.items(): for key, dfs in measurements.items(): - logger.debug("generate %s %s %s", speaker_name, origin, key) - freq = {k: dfs[k] for k in FREQ_FILTER if k in dfs} - contour = {k: dfs[k] for k in CONTOUR_FILTER if k in dfs} - radar = {k: dfs[k] for k in RADAR_FILTER if k in dfs} - # eq - eq = None - if key != "default_eq": - eq_filter = [ - "ref_vs_eq", - ] - eq = {k: dfs[k] for k in eq_filter if k in dfs} - # get index.html filename - dirname = cpaths.CPATH_DOCS_SPEAKERS + "/" + speaker_name + "/" - if origin in ("ASR", "Princeton", "ErinsAudioCorner", "Misc"): - dirname += origin - else: - dirname += meta[speaker_name]["brand"] - index_name = "{0}/index_{1}.html".format(dirname, key) - - # write index.html - logger.info("Writing %s for %s", index_name, speaker_name) - speaker_content = speaker_html.render( - speaker=speaker_name, - g_freq=freq, - g_contour=contour, - g_radar=radar, - g_key=key, - g_eq=eq, - meta=meta, - origin=origin, - site=site, - use_search=use_search, - ) - write_if_different(speaker_content, index_name) - - # write a small file per graph to render the json generated by Vega - for kind in [freq, contour, radar]: - for graph_name in kind: - graph_filename = "{0}/{1}/{2}.html".format(dirname, key, graph_name) - logger.info("Writing %s/%s for %s", key, graph_filename, speaker_name) - graph_content = graph_html.render( - speaker=speaker_name, graph=graph_name, meta=meta, site=site + try: + # print('DEBUG: '+speaker_name+' origin='+origin+' version='+key) + generate_measurement( + dataframe, + meta, + site, + use_search, + speaker_name, + origins, + speaker_html, + graph_html, + origin, + measurements, + key, + dfs, + ) + except KeyError as key_error: + print( + "generate_speaker: a file per speaker for {} failed with {}".format( + speaker_name, key_error ) - write_if_different(graph_content, graph_filename) + ) def generate_speakers(mako, dataframe, meta, site, use_search): @@ -156,29 +195,20 @@ def generate_speakers(mako, dataframe, meta, site, use_search): graph_html = mako.get_template("graph.html") for speaker_name, origins in dataframe.items(): logger.debug("html generation for speaker_name=" + speaker_name) - try: - if extradata[speaker_name].get("skip", False): - logger.debug("skipping %s", speaker_name) - continue - generate_speaker( - dataframe, meta, site, use_search, speaker_name, origins, speaker_html, graph_html - ) - except KeyError as key_error: - print("Graph generation failed for {}".format(key_error)) + if speaker_name in extradata and extradata[speaker_name].get("skip", False): + logger.debug("skipping %s", speaker_name) + continue + generate_speaker( + dataframe, meta, site, use_search, speaker_name, origins, speaker_html, graph_html + ) return 0 def main(): # load all metadata from generated json file - pattern = "{}-[0-9]*.json".format(cpaths.CPATH_METADATA_JSON[:-5]) - json_filenames = glob(pattern) - json_filename = None - for json_maybe in json_filenames: - check = re.match(".*[-][0-9]{5}[.]json$", json_maybe) - if check is not None: - json_filename = json_maybe - if not os.path.exists(json_filename): + json_filename = find_metadata_file() + if json_filename is None: logger.error("Cannot find %s", json_filename) sys.exit(1) diff --git a/generate_meta.py b/generate_meta.py index 2bf120964..a8909b48b 100755 --- a/generate_meta.py +++ b/generate_meta.py @@ -39,11 +39,13 @@ --dash-ip= IP for the ray dashboard to track execution --dash-port= Port for the ray dashbboard """ +import contextlib from hashlib import md5 from itertools import groupby import json from glob import glob import math +from pathlib import Path import os import sys import time @@ -70,6 +72,7 @@ cache_load, custom_ray_init, sort_metadata_per_date, + find_metadata_file, ) import spinorama.constant_paths as cpaths from spinorama.compute_estimates import estimates @@ -769,51 +772,65 @@ def dict_to_json(filename, d): return # hash changed, remove old files old_hash_pattern = "{}-*.json".format(filename[:-5]) - for fileold in glob(old_hash_pattern): - logger.debug("remove old file %s", fileold) - os.remove(fileold) + for old_filename in glob(old_hash_pattern): + logger.debug("remove old file %s", old_filename) + os.remove(old_filename) with open(hashed_filename, "w", encoding="utf-8") as f: f.write(js) f.close() - with zipfile.ZipFile( - hashed_filename + ".zip", - "w", - compression=zipfile.ZIP_DEFLATED, - allowZip64=True, - ) as current_zip: - current_zip.writestr(hashed_filename, js) + # write the zip file + with zipfile.ZipFile( + hashed_filename + ".zip", + "w", + compression=zipfile.ZIP_DEFLATED, + allowZip64=True, + ) as current_zip: + current_zip.writestr(hashed_filename, js) logger.debug("generated %s and zip version", hashed_filename) + + # add a link to make it easier for other scripts to find the metadata + with contextlib.suppress(OSError): + os.symlink(Path(hashed_filename).name, cpaths.CPATH_METADATA_JSON) meta_full = {k: v for k, v in meta.items() if not v.get("skip", False)} dict_to_json(metafile, meta_full) - # generate a short version for rapid home page charging - # TODO(pierre) - # let's check if it is faster to load slices than the full file - # partitionning is per year, each file is hashed and the hash - # is stored in the name. - # Warning: when reading the chunks you need to read them from recent to old and discard the keys you already have seen, - meta_sorted_date = list(sort_metadata_per_date(meta_full).items()) - meta_sorted_date_head = dict(meta_sorted_date[0:10]) - meta_sorted_date_tail = dict(meta_sorted_date[10:]) - - filename = metafile[:-5] + "-head.json" - dict_to_json(filename, meta_sorted_date_head) - - def by_year(key): - m = meta_sorted_date_tail[key] - def_m = m["default_measurement"] - year = int(m["measurements"][def_m].get("review_published", "1970")[0:4]) - # group together years without too many reviews - if year > 1970 and year < 2020: - return 2019 - return year - - grouped_by_year = groupby(meta_sorted_date_tail, by_year) - for year, group in grouped_by_year: - filename = "{}-{:4d}.json".format(metafile[:-5], year) - dict_to_json(filename, {k: meta_sorted_date_tail[k] for k in list(group)}) + +# debugjs = find_metadata_file() +# debugmeta = None +# with open(debugjs, "r") as f: +# debugmeta = json.load(f) +# print('DEBUG: size of full ==> {}'.format(len(meta.keys()))) +# print('DEBUG: size of meta ==> {}'.format(len(meta_full.keys()))) +# print('DEBUG: size of js ==> {}'.format(len(debugmeta.keys()))) + +# # generate a short version for rapid home page charging +# # TODO(pierre) +# # let's check if it is faster to load slices than the full file +# # partitionning is per year, each file is hashed and the hash +# # is stored in the name. +# # Warning: when reading the chunks you need to read them from recent to old and discard the keys you a#lready have seen, +# meta_sorted_date = list(sort_metadata_per_date(meta_full).items()) +# meta_sorted_date_head = dict(meta_sorted_date[0:10]) +# meta_sorted_date_tail = dict(meta_sorted_date[10:]) +# +# filename = metafile[:-5] + "-head.json" +# dict_to_json(filename, meta_sorted_date_head) +# +# def by_year(key): +# m = meta_sorted_date_tail[key] +# def_m = m["default_measurement"] +# year = int(m["measurements"][def_m].get("review_published", "1970")[0:4]) +# # group together years without too many reviews +# if year > 1970 and year < 2020: +# return 2019 +# return year +# +# grouped_by_year = groupby(meta_sorted_date_tail, by_year) +# for year, group in grouped_by_year: +# filename = "{}-{:4d}.json".format(metafile[:-5], year) +# dict_to_json(filename, {k: meta_sorted_date_tail[k] for k in list(group)}) def main(): diff --git a/generate_peqs.py b/generate_peqs.py index 5d02c632e..51f0ae80a 100755 --- a/generate_peqs.py +++ b/generate_peqs.py @@ -588,7 +588,7 @@ def main(): if disable_ray: df_all_speakers = cache_load_seq(filters=do_filters, smoke_test=smoke_test) else: - df_all_speakers = cache_load(filters=do_filters, smoke_test=smoke_test, level) + df_all_speakers = cache_load(filters=do_filters, smoke_test=smoke_test, level=level) except ValueError as v_e: if speaker_name is not None: print( diff --git a/generate_radar.py b/generate_radar.py index 6bd471569..b60d4ee49 100755 --- a/generate_radar.py +++ b/generate_radar.py @@ -36,7 +36,7 @@ import plotly.graph_objects as go from spinorama.constant_paths import CPATH_METADATA_JSON, CPATH_DOCS_SPEAKERS -from generate_common import get_custom_logger, args2level +from generate_common import get_custom_logger, args2level, find_metadata_file VERSION = 0.1 @@ -158,9 +158,9 @@ def print_radar(data, scale): def main(): # load all metadata from generated json file - json_filename = CPATH_METADATA_JSON - if not os.path.exists(json_filename): - logger.error("Cannot find %s", json_filename) + json_filename = find_metadata_file() + if json_filename is None: + logger.error("Cannot find metadata file, did you ran generate_meta.py ?") sys.exit(1) jsmeta = None diff --git a/generate_stats.py b/generate_stats.py index 1200aeed2..5d771b670 100755 --- a/generate_stats.py +++ b/generate_stats.py @@ -35,7 +35,7 @@ import pandas as pd from spinorama.constant_paths import CPATH_METADATA_JSON -from generate_common import get_custom_logger, args2level +from generate_common import get_custom_logger, args2level, find_metadata_file VERSION = 0.4 @@ -154,9 +154,9 @@ def main(): print_what = args["--print"] # load all metadata from generated json file - json_filename = CPATH_METADATA_JSON - if not os.path.exists(json_filename): - logger.error("Cannot find %s", json_filename) + json_filename = find_metadata_file() + if json_filename is None: + logger.error("Cannot find metadata file, did you ran generate_meta.py ?") sys.exit(1) jsmeta = None diff --git a/src/website/brands.html b/src/website/brands.html index d30144f35..7ef5b09eb 100644 --- a/src/website/brands.html +++ b/src/website/brands.html @@ -1,3 +1,4 @@ + diff --git a/src/website/reviewers.html b/src/website/reviewers.html index fc96681dc..670b1b0be 100644 --- a/src/website/reviewers.html +++ b/src/website/reviewers.html @@ -1,20 +1,17 @@ - - - - + + - - - - - + + + + diff --git a/update_brands.sh b/update_brands.sh index fdd3bf85f..4309dcae4 100755 --- a/update_brands.sh +++ b/update_brands.sh @@ -17,7 +17,7 @@ # along with this program. If not, see . export LOCALE=C -json_pp < docs/assets/metadata.json | \ +json_pp < ./docs/assets/metadata.json | \ grep '"brand" : ' | \ cut -d: -f 2 | \ cut -b 2- | \ diff --git a/update_website.sh b/update_website.sh index 9edc83e4f..778e33710 100755 --- a/update_website.sh +++ b/update_website.sh @@ -19,7 +19,7 @@ echo "Update starts" export PYTHONPATH=src:src/website:src/spinorama:. -PYTHON=python3.11 +PYTHON=python3.10 IP="127.0.0.1" case $HOSTNAME in @@ -40,7 +40,7 @@ esac #echo $IP # check meta -command=$(python3.11 ./check_meta.py) +command=$(python3.10 ./check_meta.py) status=$? if [ $status -ne 0 ]; then echo "KO checking metadata ($status)"; @@ -53,7 +53,7 @@ fi ./update_pictures.sh # generate all graphs if some are missing rm -fr /tmp/ray -command=$(python3.11 ./generate_graphs.py --dash-ip="$IP") +command=$(python3.10 ./generate_graphs.py --dash-ip="$IP") status=$? if [ $status -ne 0 ]; then echo "KO after generate graph!" @@ -63,7 +63,7 @@ else fi # recompute metadata for all speakers rm -f docs/assets/metadata.json -command=$(python3.11 ./generate_meta.py --dash-ip="$IP") +command=$(python3.10 ./generate_meta.py --dash-ip="$IP") status=$? if [ $status -ne 0 ]; then echo "KO after generate meta!" @@ -75,7 +75,7 @@ fi ./update_pictures.sh # generate radar # rm -f docs/speakers/*/spider* -command=$(python3.11 ./generate_radar.py) +command=$(python3.10 ./generate_radar.py) status=$? if [ $status -ne 0 ]; then echo "KO after generate radar!" @@ -85,7 +85,7 @@ else fi # generate eq_compare # rm -f docs/speakers/*/eq_compare* -command=$(python3.11 ./generate_eq_compare.py) +command=$(python3.10 ./generate_eq_compare.py) status=$? if [ $status -ne 0 ]; then echo "KO after generate EQ compare!" @@ -95,7 +95,7 @@ else fi # generate status rm -f docs/stats/*.json -command=$(python3.11 ./generate_stats.py) +command=$(python3.10 ./generate_stats.py) status=$? if [ $status -ne 0 ]; then echo "KO after generate statistics!" @@ -106,7 +106,7 @@ fi # generate website ./update_brands.sh ./update_reviewers.sh -command=$(python3.11 ./generate_html.py --dev --sitedev=https://dev.spinorama.org) +command=$(python3.10 ./generate_html.py --dev --sitedev=https://dev.spinorama.org) status=$? if [ $status -ne 0 ]; then echo "KO after generate HTML!"