'use strict'; import { Templater, handleResponse, convertNumber, abbrNumber } from './util.js'; /** * Chart.js defaults */ const CHART_COLORS = { model1: '#09a3d5', model2: '#066B8C' }; const CHART_FONTS = { legend: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"', ticks: 'Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace' }; /** * Formatters for model details. * @property {function} author – Format model author with optional link. * @property {function} license - Format model license with optional link. * @property {function} sources - Format training data sources (list or string). * @property {function} pipeline - Format list of pipeline components. * @property {function} vectors - Format vector data (entries and dimensions). * @property {function} version - Format model version number. */ export const formats = { author: (author, url) => url ? `${author}` : author, license: (license, url) => url ? `${license}` : license, sources: sources => (sources instanceof Array) ? sources.join(', ') : sources, pipeline: pipes => (pipes && pipes.length) ? pipes.map(p => `${p}`).join(', ') : '-', vectors: vec => vec ? `${abbrNumber(vec.keys)} keys, ${abbrNumber(vec.vectors)} unique vectors (${vec.width} dimensions)` : 'n/a', version: version => `v${version}` }; /** * Find the latest version of a model in a compatibility table. * @param {string} model - The model name. * @param {Object} compat - Compatibility table, keyed by spaCy version. */ export const getLatestVersion = (model, compat = {}) => { for (let [spacy_v, models] of Object.entries(compat)) { if (models[model]) return models[model][0]; } }; export class ModelLoader { /** * Load model meta from GitHub and update model details on site. Uses the * Templater mini template engine to update DOM. * @param {string} repo - Path tp GitHub repository containing releases. * @param {Array} models - List of model IDs, e.g. "en_core_web_sm". * @param {Object} licenses - License IDs mapped to URLs. * @param {Object} benchmarkKeys - Objects of available keys by type, e.g. * 'parser', 'ner', 'speed', mapped to labels. */ constructor(repo, models = [], licenses = {}, benchmarkKeys = {}) { this.url = `https://raw.githubusercontent.com/${repo}/master`; this.repo = `https://github.com/${repo}`; this.modelIds = models; this.licenses = licenses; this.benchKeys = benchmarkKeys; this.init(); } init() { this.modelIds.forEach(modelId => new Templater(modelId).get('table').setAttribute('data-loading', '')); this.fetch(`${this.url}/compatibility.json`) .then(json => this.getModels(json.spacy)) .catch(_ => this.modelIds.forEach(modelId => this.showError(modelId))); // make sure scroll positions for progress bar etc. are recalculated window.dispatchEvent(new Event('resize')); } fetch(url) { return new Promise((resolve, reject) => fetch(url).then(res => handleResponse(res)) .then(json => json.ok ? resolve(json) : reject())) } getModels(compat) { this.compat = compat; for (let modelId of this.modelIds) { const version = getLatestVersion(modelId, compat); if (version) this.fetch(`${this.url}/meta/${modelId}-${version}.json`) .then(json => this.render(json)) .catch(_ => this.showError(modelId)) else this.showError(modelId); } } showError(modelId) { const tpl = new Templater(modelId); tpl.get('table').removeAttribute('data-loading'); tpl.get('error').style.display = 'block'; for (let key of ['sources', 'pipeline', 'vectors', 'author', 'license']) { tpl.get(key).parentElement.parentElement.style.display = 'none'; } } /** * Update model details in tables. Currently quite hacky :( */ render(data) { const modelId = `${data.lang}_${data.name}`; const model = `${modelId}-${data.version}`; const tpl = new Templater(modelId); tpl.get('error').style.display = 'none'; this.renderDetails(tpl, data) this.renderBenchmarks(tpl, data.accuracy, data.speed); this.renderCompat(tpl, modelId); tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`); tpl.get('table').removeAttribute('data-loading'); } renderDetails(tpl, { version, size, description, notes, author, url, license, sources, vectors, pipeline }) { const basics = { version, size, description, notes } for (let [key, value] of Object.entries(basics)) { if (value) tpl.fill(key, value); } if (author) tpl.fill('author', formats.author(author, url), true); if (license) tpl.fill('license', formats.license(license, this.licenses[license]), true); if (sources) tpl.fill('sources', formats.sources(sources)); if (vectors) tpl.fill('vectors', formats.vectors(vectors)); else tpl.get('vectors').parentElement.parentElement.style.display = 'none'; if (pipeline && pipeline.length) tpl.fill('pipeline', formats.pipeline(pipeline), true); else tpl.get('pipeline').parentElement.parentElement.style.display = 'none'; } renderBenchmarks(tpl, accuracy = {}, speed = {}) { if (!accuracy && !speed) return; this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2)); this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2)); this.renderTable(tpl, 'speed', speed, Math.round); tpl.get('benchmarks').style.display = 'block'; } renderTable(tpl, id, benchmarks, converter = val => val) { if (!this.benchKeys[id] || !Object.keys(this.benchKeys[id]).some(key => benchmarks[key])) return; for (let key of Object.keys(this.benchKeys[id])) { if (benchmarks[key]) tpl .fill(key, convertNumber(converter(benchmarks[key]))) .parentElement.style.display = 'table-row'; } tpl.get(id).style.display = 'block'; } renderCompat(tpl, modelId) { tpl.get('compat-wrapper').style.display = 'table-row'; const header = ''; const options = Object.keys(this.compat) .map(v => ``) .join(''); tpl .fill('compat', header + options, true) .addEventListener('change', ({ target: { value }}) => tpl.fill('compat-versions', this.getCompat(value, modelId), true)) } getCompat(version, model) { const res = this.compat[version][model]; return res ? `${model}-${res[0]}` : 'not compatible'; } } export class ModelComparer { /** * Compare to model meta files and render chart and comparison table. * @param {string} repo - Path tp GitHub repository containing releases. * @param {Object} licenses - License IDs mapped to URLs. * @param {Object} benchmarkKeys - Objects of available keys by type, e.g. * 'parser', 'ner', 'speed', mapped to labels. * @param {Object} languages - Available languages, ID mapped to name. * @param {Object} defaultModels - Models to compare on load, 'model1' and * 'model2' mapped to model names. */ constructor(repo, licenses = {}, benchmarkKeys = {}, languages = {}, labels = {}, defaultModels) { this.url = `https://raw.githubusercontent.com/${repo}/master`; this.repo = `https://github.com/${repo}`; this.tpl = new Templater('compare'); this.benchKeys = benchmarkKeys; this.licenses = licenses; this.languages = languages; this.labels = labels; this.models = {}; this.colors = CHART_COLORS; this.fonts = CHART_FONTS; this.defaultModels = defaultModels; this.tpl.get('result').style.display = 'block'; this.fetchCompat() .then(compat => this.init(compat)) .catch(this.showError.bind(this)) } init(compat) { this.compat = compat; const selectA = this.tpl.get('model1'); const selectB = this.tpl.get('model2'); selectA.addEventListener('change', this.onSelect.bind(this)); selectB.addEventListener('change', this.onSelect.bind(this)); this.chart = new Chart('chart_compare_accuracy', { type: 'bar', options: { responsive: true, legend: { position: 'bottom', labels: { fontFamily: this.fonts.legend, fontSize: 13 }}, scales: { yAxes: [{ label: 'Accuracy', ticks: { min: 70, fontFamily: this.fonts.ticks }}], xAxes: [{ barPercentage: 0.75, ticks: { fontFamily: this.fonts.ticks }}] } }}); if (this.defaultModels) { selectA.value = this.defaultModels.model1; selectB.value = this.defaultModels.model2; this.getModels(this.defaultModels); } } fetchCompat() { return new Promise((resolve, reject) => fetch(`${this.url}/compatibility.json`) .then(res => handleResponse(res)) .then(json => json.ok ? resolve(json.spacy) : reject())) } fetchModel(name) { const version = getLatestVersion(name, this.compat); const modelName = `${name}-${version}`; return new Promise((resolve, reject) => { // resolve immediately if model already loaded, e.g. in this.models if (this.models[name]) resolve(this.models[name]); else fetch(`${this.url}/meta/${modelName}.json`) .then(res => handleResponse(res)) .then(json => json.ok ? resolve(this.saveModel(name, json)) : reject()) }) } /** * "Save" meta to this.models so it only has to be fetched from GitHub once. * @param {string} name - The model name. * @param {Object} data - The model meta data. */ saveModel(name, data) { this.models[name] = data; return data; } showError(err) { console.error(err); this.tpl.get('result').style.display = 'none'; this.tpl.get('error').style.display = 'block'; } onSelect(ev) { const modelId = ev.target.value; const otherId = (ev.target.id == 'model1') ? 'model2' : 'model1'; const otherVal = this.tpl.get(otherId); const otherModel = otherVal.options[otherVal.selectedIndex].value; if (otherModel != '') this.getModels({ [ev.target.id]: modelId, [otherId]: otherModel }) } getModels({ model1, model2 }) { this.tpl.get('result').setAttribute('data-loading', ''); this.fetchModel(model1) .then(data1 => this.fetchModel(model2) .then(data2 => this.render({ model1: data1, model2: data2 }))) .catch(this.showError.bind(this)) } /** * Render two models, and populate the chart and table. Currently quite hacky :( * @param {Object} models - The models to render. * @param {Object} models.model1 - The first model (via first ). */ render({ model1, model2 }) { const accKeys = Object.assign({}, this.benchKeys.parser, this.benchKeys.ner); const allKeys = [...Object.keys(model1.accuracy || []), ...Object.keys(model2.accuracy || [])]; const metaKeys = Object.keys(accKeys).filter(k => allKeys.includes(k)); const labels = metaKeys.map(key => accKeys[key]); const datasets = [model1, model2] .map(({ lang, name, version, accuracy = {} }, i) => ({ label: `${lang}_${name}-${version}`, backgroundColor: this.colors[`model${i + 1}`], data: metaKeys.map(key => (accuracy[key] || 0).toFixed(2)) })); this.chart.data = { labels, datasets }; this.chart.update(); [model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model)); this.tpl.get('result').removeAttribute('data-loading'); } renderTable(metaKeys, i, { lang, name, version, size, description, notes, author, url, license, sources, vectors, pipeline, accuracy = {}, speed = {}}) { const type = name.split('_')[0]; // extract type from model name const genre = name.split('_')[1]; // extract genre from model name this.tpl.fill(`table-head${i}`, `${lang}_${name}`); this.tpl.get(`link${i}`).setAttribute('href', `/models/${lang}#${lang}_${name}`); this.tpl.fill(`download${i}`, `spacy download ${lang}_${name}\n`); this.tpl.fill(`lang${i}`, this.languages[lang] || lang); this.tpl.fill(`type${i}`, this.labels[type] || type); this.tpl.fill(`genre${i}`, this.labels[genre] || genre); this.tpl.fill(`version${i}`, formats.version(version), true); this.tpl.fill(`size${i}`, size); this.tpl.fill(`desc${i}`, description || 'n/a'); this.tpl.fill(`pipeline${i}`, formats.pipeline(pipeline), true); this.tpl.fill(`vectors${i}`, formats.vectors(vectors)); this.tpl.fill(`sources${i}`, formats.sources(sources)); this.tpl.fill(`author${i}`, formats.author(author, url), true); this.tpl.fill(`license${i}`, formats.license(license, this.licenses[license]), true); // check if model accuracy or speed includes one of the pre-set keys for (let key of [...metaKeys, ...Object.keys(this.benchKeys.speed)]) { if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2)) else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key]))) else this.tpl.fill(`${key}${i}`, 'n/a') } } }