Merge pull request #857 from okcoker/feature/tag-normalization

feat: Clean up tags
This commit is contained in:
Wes Bos 2020-10-12 15:04:22 -04:00 committed by GitHub
commit d5a9c0b2bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 9 deletions

View file

@ -1,26 +1,40 @@
import people from './src/data.js'; import people from './src/data.js';
import { tags, countries, devices } from './src/util/stats'; import { tags, countries, devices, normalizeTag } from './src/util/stats';
function unique(arr) {
return Array.from(new Set(arr));
}
function sourceNodes({ actions, createNodeId, createContentDigest }) { function sourceNodes({ actions, createNodeId, createContentDigest }) {
const normalizedTagMap = tags().reduce((acc, tag) => {
const normalizedTag = normalizeTag(tag.name);
acc[normalizedTag] = tag.name;
return acc;
}, {});
// Add People to the GraphQL API, we randomize the data on each build so no one gets their feelings hurt // Add People to the GraphQL API, we randomize the data on each build so no one gets their feelings hurt
people people
.sort(() => Math.random() - 0.5) .sort(() => Math.random() - 0.5)
.forEach(p => { .forEach(person => {
// Remove duplicated tags. const normalizedPerson = {
const person = { ...p, tags: [...new Set(p.tags)] }; ...person,
// Clean out people that added basically the same tags twice
tags: unique(
person.tags.map(tag => normalizedTagMap[normalizeTag(tag)] || tag)
),
};
const nodeMeta = { const nodeMeta = {
id: createNodeId(`person-${person.name}`), id: createNodeId(`person-${normalizedPerson.name}`),
parent: null, parent: null,
children: [], children: [],
internal: { internal: {
type: `Person`, type: `Person`,
mediaType: `text/html`, mediaType: `text/html`,
content: JSON.stringify(person), content: JSON.stringify(normalizedPerson),
contentDigest: createContentDigest(person), contentDigest: createContentDigest(normalizedPerson),
}, },
}; };
actions.createNode({ ...person, ...nodeMeta }); actions.createNode({ ...normalizedPerson, ...nodeMeta });
}); });
// Add tags to GraphQL API // Add tags to GraphQL API

View file

@ -14,6 +14,23 @@ function countInstances(acc, tag) {
return acc; return acc;
} }
export function normalizeTag(tag) {
return (
tag
// Common mispellings currently seen in the data
// Do we want to go this far?
.replace(/frontend/i, 'Front End')
.replace(/backend/i, 'Back End')
.replace(/fullstack/i, 'Full Stack')
.replace(/a11y/i, 'Accessibility')
.replace(/next.?js/i, 'Next')
.replace(/react.?js/i, 'React')
// Or is lowercase enough?
.toLowerCase()
);
}
export function countries() { export function countries() {
const data = people const data = people
.map(person => ({ .map(person => ({
@ -50,7 +67,25 @@ export function tags() {
.filter(([, count]) => count >= 3) .filter(([, count]) => count >= 3)
.map(([name, count]) => ({ name, count })); .map(([name, count]) => ({ name, count }));
return [{ name: 'all', count: people.length }, ...tags]; const lowercaseTagMap = tags.reduce((acc, tag) => {
const normalizedName = normalizeTag(tag.name);
const currentCount = acc[normalizedName] || 0;
acc[normalizedName] = currentCount + tag.count;
return acc;
}, {});
// Merge tags like "JavaScript" and "Javascript" based on the
// count… Event though it's obviously JavaScript!
const normalizedTags = tags.reduce((acc, { name }) => {
const normalizedName = normalizeTag(name);
if (typeof lowercaseTagMap[normalizedName] !== 'undefined') {
acc.push({ name, count: lowercaseTagMap[normalizedName] });
delete lowercaseTagMap[normalizedName];
}
return acc;
}, []);
return [{ name: 'all', count: people.length }, ...normalizedTags];
} }
export function devices() { export function devices() {