Merge pull request #857 from okcoker/feature/tag-normalization

feat: Clean up tags
This commit is contained in:
Wes Bos 2020-10-12 15:04:22 -04:00 committed by GitHub
commit d5a9c0b2bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 9 deletions

View file

@ -1,26 +1,40 @@
import people from './src/data.js';
import { tags, countries, devices } from './src/util/stats';
import { tags, countries, devices, normalizeTag } from './src/util/stats';
function unique(arr) {
return Array.from(new Set(arr));
}
function sourceNodes({ actions, createNodeId, createContentDigest }) {
const normalizedTagMap = tags().reduce((acc, tag) => {
const normalizedTag = normalizeTag(tag.name);
acc[normalizedTag] = tag.name;
return acc;
}, {});
// Add People to the GraphQL API, we randomize the data on each build so no one gets their feelings hurt
people
.sort(() => Math.random() - 0.5)
.forEach(p => {
// Remove duplicated tags.
const person = { ...p, tags: [...new Set(p.tags)] };
.forEach(person => {
const normalizedPerson = {
...person,
// Clean out people that added basically the same tags twice
tags: unique(
person.tags.map(tag => normalizedTagMap[normalizeTag(tag)] || tag)
),
};
const nodeMeta = {
id: createNodeId(`person-${person.name}`),
id: createNodeId(`person-${normalizedPerson.name}`),
parent: null,
children: [],
internal: {
type: `Person`,
mediaType: `text/html`,
content: JSON.stringify(person),
contentDigest: createContentDigest(person),
content: JSON.stringify(normalizedPerson),
contentDigest: createContentDigest(normalizedPerson),
},
};
actions.createNode({ ...person, ...nodeMeta });
actions.createNode({ ...normalizedPerson, ...nodeMeta });
});
// Add tags to GraphQL API

View file

@ -14,6 +14,23 @@ function countInstances(acc, tag) {
return acc;
}
export function normalizeTag(tag) {
return (
tag
// Common mispellings currently seen in the data
// Do we want to go this far?
.replace(/frontend/i, 'Front End')
.replace(/backend/i, 'Back End')
.replace(/fullstack/i, 'Full Stack')
.replace(/a11y/i, 'Accessibility')
.replace(/next.?js/i, 'Next')
.replace(/react.?js/i, 'React')
// Or is lowercase enough?
.toLowerCase()
);
}
export function countries() {
const data = people
.map(person => ({
@ -50,7 +67,25 @@ export function tags() {
.filter(([, count]) => count >= 3)
.map(([name, count]) => ({ name, count }));
return [{ name: 'all', count: people.length }, ...tags];
const lowercaseTagMap = tags.reduce((acc, tag) => {
const normalizedName = normalizeTag(tag.name);
const currentCount = acc[normalizedName] || 0;
acc[normalizedName] = currentCount + tag.count;
return acc;
}, {});
// Merge tags like "JavaScript" and "Javascript" based on the
// count… Event though it's obviously JavaScript!
const normalizedTags = tags.reduce((acc, { name }) => {
const normalizedName = normalizeTag(name);
if (typeof lowercaseTagMap[normalizedName] !== 'undefined') {
acc.push({ name, count: lowercaseTagMap[normalizedName] });
delete lowercaseTagMap[normalizedName];
}
return acc;
}, []);
return [{ name: 'all', count: people.length }, ...normalizedTags];
}
export function devices() {