2025-04-08 19:32:49 +00:00

603 lines
13 KiB
Go

package main
import (
"bufio"
"errors"
"fmt"
"log"
"net/url"
"os"
"path"
"slices"
"strings"
"gopkg.in/yaml.v3"
)
const rootRegistryPath = "../../../registry"
type directoryReadme struct {
FilePath string
RawText string
}
type rawContributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
GithubUsername string `yaml:"github"`
AvatarUrl *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
EmployerGithubUsername *string `yaml:"employer_github"`
ContributorStatus *string `yaml:"status"`
}
type contributorFrontmatterWithFilepath struct {
rawContributorProfileFrontmatter
FilePath string
}
type contributorProfileStatus int
const (
// Community should always be the first value defined via iota; it should be
// treated as the zero value of the type in the event that a more specific
// status wasn't defined
profileStatusCommunity contributorProfileStatus = iota
profileStatusPartner
profileStatusOfficial
)
func (status contributorProfileStatus) String() string {
switch status {
case profileStatusOfficial:
return "official"
case profileStatusPartner:
return "partner"
default:
return "community"
}
}
type contributorProfile struct {
EmployeeGithubUsernames []string
GithubUsername string
DisplayName string
Bio string
AvatarUrl *string
WebsiteURL *string
LinkedinURL *string
SupportEmail *string
Status contributorProfileStatus
}
var _ error = workflowPhaseError{}
type workflowPhaseError struct {
Phase string
Errors []error
}
func (wpe workflowPhaseError) Error() string {
msg := fmt.Sprintf("Error during %q phase of README validation:", wpe.Phase)
for _, e := range wpe.Errors {
msg += fmt.Sprintf("\n- %v", e)
}
msg += "\n"
return msg
}
func extractFrontmatter(readmeText string) (string, error) {
if readmeText == "" {
return "", errors.New("README is empty")
}
const fence = "---"
fm := ""
fenceCount := 0
lineScanner := bufio.NewScanner(
strings.NewReader(strings.TrimSpace(readmeText)),
)
for lineScanner.Scan() {
nextLine := lineScanner.Text()
if fenceCount == 0 && nextLine != fence {
return "", errors.New("README does not start with frontmatter fence")
}
if nextLine != fence {
fm += nextLine + "\n"
continue
}
fenceCount++
if fenceCount >= 2 {
break
}
}
if fenceCount == 1 {
return "", errors.New("README does not have two sets of frontmatter fences")
}
return fm, nil
}
func validateContributorYaml(yml contributorFrontmatterWithFilepath) []error {
// This function needs to aggregate a bunch of different problems, rather
// than stopping at the first one found, so using code blocks to section off
// logic for different fields
problems := []error{}
// Using a bunch of closures to group validations for each field and add
// support for ending validations for a group early. The alternatives were
// making a bunch of functions in the top-level that would only be used
// once, or using goto statements, which would've made refactoring fragile
// GitHub Username
func() {
if yml.GithubUsername == "" {
problems = append(
problems,
fmt.Errorf(
"missing GitHub username for %q",
yml.FilePath,
),
)
return
}
lower := strings.ToLower(yml.GithubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
problems = append(
problems,
fmt.Errorf(
"gitHub username %q (%q) is not a valid URL path segment",
yml.GithubUsername,
yml.FilePath,
),
)
}
}()
// Company GitHub
func() {
if yml.EmployerGithubUsername == nil {
return
}
if *yml.EmployerGithubUsername == "" {
problems = append(
problems,
fmt.Errorf(
"company_github field is defined but has empty value for %q",
yml.FilePath,
),
)
return
}
lower := strings.ToLower(*yml.EmployerGithubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
problems = append(
problems,
fmt.Errorf(
"gitHub company username %q (%q) is not a valid URL path segment",
*yml.EmployerGithubUsername,
yml.FilePath,
),
)
}
if *yml.EmployerGithubUsername == yml.GithubUsername {
problems = append(
problems,
fmt.Errorf(
"cannot list own GitHub name (%q) as employer (%q)",
yml.GithubUsername,
yml.FilePath,
),
)
}
}()
// Display name
func() {
if yml.DisplayName == "" {
problems = append(
problems,
fmt.Errorf(
"GitHub user %q (%q) is missing display name",
yml.GithubUsername,
yml.FilePath,
),
)
}
}()
// LinkedIn URL
func() {
if yml.LinkedinURL == nil {
return
}
if _, err := url.ParseRequestURI(*yml.LinkedinURL); err != nil {
problems = append(
problems,
fmt.Errorf(
"linkedIn URL %q (%q) is not valid: %v",
*yml.LinkedinURL,
yml.FilePath,
err,
),
)
}
}()
// Email
func() {
if yml.SupportEmail == nil {
return
}
// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*yml.SupportEmail, "@")
if !ok {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing @ symbol",
*yml.LinkedinURL,
yml.FilePath,
),
)
return
}
if username == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing username",
*yml.LinkedinURL,
yml.FilePath,
),
)
}
domain, tld, ok := strings.Cut(server, ".")
if !ok {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing period for server segment",
*yml.LinkedinURL,
yml.FilePath,
),
)
return
}
if domain == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing domain",
*yml.LinkedinURL,
yml.FilePath,
),
)
}
if tld == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing top-level domain",
*yml.LinkedinURL,
yml.FilePath,
),
)
}
if strings.Contains(*yml.SupportEmail, "?") {
problems = append(
problems,
fmt.Errorf(
"email for %q is not allowed to contain search parameters",
yml.FilePath,
),
)
}
}()
// Website
func() {
if yml.WebsiteURL == nil {
return
}
if _, err := url.ParseRequestURI(*yml.WebsiteURL); err != nil {
problems = append(
problems,
fmt.Errorf(
"LinkedIn URL %q (%q) is not valid: %v",
*yml.WebsiteURL,
yml.FilePath,
err,
),
)
}
}()
// Contributor status
func() {
if yml.ContributorStatus == nil {
return
}
validStatuses := []string{"official", "partner", "community"}
if !slices.Contains(validStatuses, *yml.ContributorStatus) {
problems = append(
problems,
fmt.Errorf(
"contributor status %q (%q) is not valid",
*yml.ContributorStatus,
yml.FilePath,
),
)
}
}()
// Avatar URL - can't validate the image actually leads to a valid resource
// in a pure function, but can at least catch obvious problems
func() {
if yml.AvatarUrl == nil {
return
}
if *yml.AvatarUrl == "" {
problems = append(
problems,
fmt.Errorf(
"avatar URL for %q must be omitted or non-empty string",
yml.FilePath,
),
)
return
}
// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*yml.AvatarUrl); err != nil {
problems = append(
problems,
fmt.Errorf(
"error %q (%q) is not a valid relative or absolute URL",
*yml.AvatarUrl,
yml.FilePath,
),
)
}
if strings.Contains(*yml.AvatarUrl, "?") {
problems = append(
problems,
fmt.Errorf(
"avatar URL for %q is not allowed to contain search parameters",
yml.FilePath,
),
)
}
}()
return problems
}
func remapContributorProfile(
frontmatter contributorFrontmatterWithFilepath,
employeeGitHubNames []string,
) contributorProfile {
// Function assumes that (1) fields are previously validated and are safe to
// copy over verbatim when appropriate, and (2) any missing avatar URLs will
// be backfilled during the main Registry site build step
remapped := contributorProfile{
DisplayName: frontmatter.DisplayName,
GithubUsername: frontmatter.GithubUsername,
Bio: frontmatter.Bio,
LinkedinURL: frontmatter.LinkedinURL,
SupportEmail: frontmatter.SupportEmail,
WebsiteURL: frontmatter.WebsiteURL,
AvatarUrl: frontmatter.AvatarUrl,
}
if frontmatter.ContributorStatus != nil {
switch *frontmatter.ContributorStatus {
case "partner":
remapped.Status = profileStatusPartner
case "official":
remapped.Status = profileStatusOfficial
default:
remapped.Status = profileStatusCommunity
}
}
if employeeGitHubNames != nil {
remapped.EmployeeGithubUsernames = employeeGitHubNames[:]
slices.SortFunc(
remapped.EmployeeGithubUsernames,
func(name1 string, name2 string) int {
return strings.Compare(name1, name2)
},
)
}
return remapped
}
func parseContributorFiles(readmeEntries []directoryReadme) (
map[string]contributorProfile,
error,
) {
frontmatterByGithub := map[string]contributorFrontmatterWithFilepath{}
yamlParsingErrors := workflowPhaseError{
Phase: "YAML parsing",
}
for _, rm := range readmeEntries {
fmText, err := extractFrontmatter(rm.RawText)
if err != nil {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf("failed to parse %q: %v", rm.FilePath, err),
)
continue
}
yml := rawContributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fmText), &yml); err != nil {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf("failed to parse %q: %v", rm.FilePath, err),
)
continue
}
trackable := contributorFrontmatterWithFilepath{
FilePath: rm.FilePath,
rawContributorProfileFrontmatter: yml,
}
if prev, conflict := frontmatterByGithub[trackable.GithubUsername]; conflict {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf(
"GitHub name conflict for %q for files %q and %q",
trackable.GithubUsername,
prev.FilePath,
trackable.FilePath,
),
)
continue
}
frontmatterByGithub[trackable.GithubUsername] = trackable
}
if len(yamlParsingErrors.Errors) != 0 {
return nil, yamlParsingErrors
}
employeeGithubGroups := map[string][]string{}
yamlValidationErrors := workflowPhaseError{
Phase: "Raw YAML Validation",
}
for _, yml := range frontmatterByGithub {
errors := validateContributorYaml(yml)
if len(errors) > 0 {
yamlValidationErrors.Errors = append(
yamlValidationErrors.Errors,
errors...,
)
continue
}
if yml.EmployerGithubUsername != nil {
employeeGithubGroups[*yml.EmployerGithubUsername] = append(
employeeGithubGroups[*yml.EmployerGithubUsername],
yml.GithubUsername,
)
}
}
if len(yamlValidationErrors.Errors) != 0 {
return nil, yamlValidationErrors
}
contributorError := workflowPhaseError{
Phase: "Contributor struct remapping",
}
structured := map[string]contributorProfile{}
for _, yml := range frontmatterByGithub {
group := employeeGithubGroups[yml.GithubUsername]
remapped := remapContributorProfile(yml, group)
structured[yml.GithubUsername] = remapped
}
for companyName, group := range employeeGithubGroups {
if _, found := structured[companyName]; found {
continue
}
contributorError.Errors = append(
contributorError.Errors,
fmt.Errorf(
"company %q does not exist in %q directory but is referenced by these profiles: [%s]",
companyName,
rootRegistryPath,
strings.Join(group, ", "),
),
)
}
if len(contributorError.Errors) != 0 {
return nil, contributorError
}
return structured, nil
}
func main() {
log.Println("Starting README validation")
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
log.Panic(err)
}
log.Printf("Identified %d top-level directory entries\n", len(dirEntries))
allReadmeFiles := []directoryReadme{}
fsErrors := workflowPhaseError{
Phase: "FileSystem reading",
Errors: []error{},
}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
fsErrors.Errors = append(
fsErrors.Errors,
fmt.Errorf(
"Detected non-directory file %q at base of main Registry directory",
dirPath,
),
)
continue
}
readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
fsErrors.Errors = append(fsErrors.Errors, err)
continue
}
allReadmeFiles = append(allReadmeFiles, directoryReadme{
FilePath: readmePath,
RawText: string(rmBytes),
})
}
if len(fsErrors.Errors) != 0 {
log.Panic(fsErrors)
}
log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
if err != nil {
log.Panic(err)
}
log.Printf(
"Processed %d README files as valid contributor profiles",
len(contributors),
)
log.Printf(
"Processed all READMEs in the %q directory\n",
rootRegistryPath,
)
}