package main import ( "bufio" "errors" "fmt" "log" "net/url" "os" "path" "slices" "strings" "sync" "gopkg.in/yaml.v3" ) const rootRegistryPath = "../../../registry" type directoryReadme struct { FilePath string RawText string } type rawContributorProfileFrontmatter struct { DisplayName string `yaml:"display_name"` Bio string `yaml:"bio"` GithubUsername string `yaml:"github"` AvatarUrl *string `yaml:"avatar"` LinkedinURL *string `yaml:"linkedin"` WebsiteURL *string `yaml:"website"` SupportEmail *string `yaml:"support_email"` EmployerGithubUsername *string `yaml:"employer_github"` ContributorStatus *string `yaml:"status"` } type contributorFrontmatterWithFilepath struct { rawContributorProfileFrontmatter FilePath string } type contributorProfileStatus int const ( // Community should always be the first value defined via iota; it should be // treated as the zero value of the type in the event that a more specific // status wasn't defined profileStatusCommunity contributorProfileStatus = iota profileStatusPartner profileStatusOfficial ) type contributorProfile struct { EmployeeGithubUsernames []string GithubUsername string DisplayName string Bio string AvatarUrl string WebsiteURL *string LinkedinURL *string SupportEmail *string Status contributorProfileStatus } var _ error = workflowPhaseError{} type workflowPhaseError struct { Phase string Errors []error } func (wpe workflowPhaseError) Error() string { msg := fmt.Sprintf("Error during %q phase of README validation:", wpe.Phase) for _, e := range wpe.Errors { msg += fmt.Sprintf("\n- %v", e) } msg += "\n" return msg } func extractFrontmatter(readmeText string) (string, error) { if readmeText == "" { return "", errors.New("README is empty") } const fence = "---" fm := "" fenceCount := 0 lineScanner := bufio.NewScanner( strings.NewReader(strings.TrimSpace(readmeText)), ) for lineScanner.Scan() { nextLine := lineScanner.Text() if fenceCount == 0 && nextLine != fence { return "", errors.New("README does not start with frontmatter fence") } if nextLine != fence { fm += nextLine + "\n" continue } fenceCount++ if fenceCount >= 2 { break } } if fenceCount == 1 { return "", errors.New("README does not have two sets of frontmatter fences") } return fm, nil } func validateContributorYaml(yml contributorFrontmatterWithFilepath) []error { // This function needs to aggregate a bunch of different errors, rather than // stopping at the first one found, so using code blocks to section off // logic for different fields errors := []error{} // GitHub Username { if yml.GithubUsername == "" { errors = append( errors, fmt.Errorf( "missing GitHub username for %q", yml.FilePath, ), ) } lower := strings.ToLower(yml.GithubUsername) if uriSafe := url.PathEscape(lower); uriSafe != lower { errors = append( errors, fmt.Errorf( "gitHub username %q (%q) is not a valid URL path segment", yml.GithubUsername, yml.FilePath, ), ) } } // Company GitHub if yml.EmployerGithubUsername != nil { if *yml.EmployerGithubUsername == "" { errors = append( errors, fmt.Errorf( "company_github field is defined but has empty value for %q", yml.FilePath, ), ) } lower := strings.ToLower(*yml.EmployerGithubUsername) if uriSafe := url.PathEscape(lower); uriSafe != lower { errors = append( errors, fmt.Errorf( "gitHub company username %q (%q) is not a valid URL path segment", *yml.EmployerGithubUsername, yml.FilePath, ), ) } if *yml.EmployerGithubUsername == yml.GithubUsername { errors = append( errors, fmt.Errorf( "cannot list own GitHub name (%q) as employer (%q)", yml.GithubUsername, yml.FilePath, ), ) } } // Display name { if yml.DisplayName == "" { errors = append( errors, fmt.Errorf( "%q (%q) is missing display name", yml.GithubUsername, yml.FilePath, ), ) } } // LinkedIn URL if yml.LinkedinURL != nil { if _, err := url.ParseRequestURI(*yml.LinkedinURL); err != nil { errors = append( errors, fmt.Errorf( "linkedIn URL %q (%q) is not valid: %v", *yml.LinkedinURL, yml.FilePath, err, ), ) } } // Email if yml.SupportEmail != nil { // Can't 100% validate that this is correct without actually sending // an email, and especially with some contributors being individual // developers, we don't want to do that on every single run of the CI // pipeline. Best we can do is verify the general structure username, server, ok := strings.Cut(*yml.SupportEmail, "@") if !ok { errors = append( errors, fmt.Errorf( "email address %q (%q) is missing @ symbol", *yml.LinkedinURL, yml.FilePath, ), ) goto website } if username == "" { errors = append( errors, fmt.Errorf( "email address %q (%q) is missing username", *yml.LinkedinURL, yml.FilePath, ), ) } domain, tld, ok := strings.Cut(server, ".") if !ok { errors = append( errors, fmt.Errorf( "email address %q (%q) is missing period for server segment", *yml.LinkedinURL, yml.FilePath, ), ) goto website } if domain == "" { errors = append( errors, fmt.Errorf( "email address %q (%q) is missing domain", *yml.LinkedinURL, yml.FilePath, ), ) } if tld == "" { errors = append( errors, fmt.Errorf( "email address %q (%q) is missing top-level domain", *yml.LinkedinURL, yml.FilePath, ), ) } } // Website website: if yml.WebsiteURL != nil { if _, err := url.ParseRequestURI(*yml.WebsiteURL); err != nil { errors = append( errors, fmt.Errorf( "LinkedIn URL %q (%q) is not valid: %v", *yml.WebsiteURL, yml.FilePath, err, ), ) } } // Contributor status if yml.ContributorStatus != nil { validStatuses := []string{"official", "partner", "community"} if !slices.Contains(validStatuses, *yml.ContributorStatus) { errors = append( errors, fmt.Errorf( "contributor status %q (%q) is not valid", *yml.ContributorStatus, yml.FilePath, ), ) } } // Avatar URL if yml.AvatarUrl != nil { } return errors } func remapContributorProfile( frontmatter contributorFrontmatterWithFilepath, employeeGitHubNames []string, ) contributorProfile { // Function assumes that fields are previously validated and are safe to // copy over verbatim when appropriate remapped := contributorProfile{ DisplayName: frontmatter.DisplayName, GithubUsername: frontmatter.GithubUsername, Bio: frontmatter.Bio, LinkedinURL: frontmatter.LinkedinURL, SupportEmail: frontmatter.SupportEmail, } if frontmatter.AvatarUrl != nil { remapped.AvatarUrl = *frontmatter.AvatarUrl } if frontmatter.ContributorStatus != nil { switch *frontmatter.ContributorStatus { case "partner": remapped.Status = profileStatusPartner case "official": remapped.Status = profileStatusOfficial default: remapped.Status = profileStatusCommunity } } if employeeGitHubNames != nil { remapped.EmployeeGithubUsernames = employeeGitHubNames[:] slices.SortFunc( remapped.EmployeeGithubUsernames, func(name1 string, name2 string) int { return strings.Compare(name1, name2) }, ) } return remapped } func parseContributorFiles(readmeEntries []directoryReadme) ( map[string]contributorProfile, error, ) { frontmatterByGithub := map[string]contributorFrontmatterWithFilepath{} yamlParsingErrors := workflowPhaseError{ Phase: "YAML parsing", } for _, rm := range readmeEntries { fmText, err := extractFrontmatter(rm.RawText) if err != nil { yamlParsingErrors.Errors = append( yamlParsingErrors.Errors, fmt.Errorf("failed to parse %q: %v", rm.FilePath, err), ) continue } yml := rawContributorProfileFrontmatter{} if err := yaml.Unmarshal([]byte(fmText), &yml); err != nil { yamlParsingErrors.Errors = append( yamlParsingErrors.Errors, fmt.Errorf("failed to parse %q: %v", rm.FilePath, err), ) continue } trackable := contributorFrontmatterWithFilepath{ FilePath: rm.FilePath, rawContributorProfileFrontmatter: yml, } if prev, conflict := frontmatterByGithub[trackable.GithubUsername]; conflict { yamlParsingErrors.Errors = append( yamlParsingErrors.Errors, fmt.Errorf( "GitHub name conflict for %q for files %q and %q", trackable.GithubUsername, prev.FilePath, trackable.FilePath, ), ) continue } frontmatterByGithub[trackable.GithubUsername] = trackable } if len(yamlParsingErrors.Errors) != 0 { return nil, yamlParsingErrors } employeeGithubGroups := map[string][]string{} yamlValidationErrors := workflowPhaseError{ Phase: "Raw YAML Validation", } for _, yml := range frontmatterByGithub { errors := validateContributorYaml(yml) if len(errors) > 0 { yamlValidationErrors.Errors = append( yamlValidationErrors.Errors, errors..., ) continue } if yml.EmployerGithubUsername != nil { employeeGithubGroups[*yml.EmployerGithubUsername] = append( employeeGithubGroups[*yml.EmployerGithubUsername], yml.GithubUsername, ) } } if len(yamlValidationErrors.Errors) != 0 { return nil, yamlValidationErrors } contributorError := workflowPhaseError{ Phase: "Contributor struct remapping", } structured := map[string]contributorProfile{} for _, yml := range frontmatterByGithub { group := employeeGithubGroups[yml.GithubUsername] remapped := remapContributorProfile(yml, group) structured[yml.GithubUsername] = remapped } for companyName, group := range employeeGithubGroups { if _, found := structured[companyName]; found { continue } contributorError.Errors = append( contributorError.Errors, fmt.Errorf( "company %q does not exist in %q directory but is referenced by these profiles: [%s]", companyName, rootRegistryPath, strings.Join(group, ", "), ), ) } if len(contributorError.Errors) != 0 { return nil, contributorError } return structured, nil } // backfillAvatarUrls takes a map of contributor information, each keyed by // GitHub username, and tries to mutate each entry to fill in its missing avatar // URL. The first integer indicates the number of avatars that needed to be // backfilled, while the second indicates the number that could be backfilled // without any errors. // // The function will collect all request errors, rather than return the first // one found. func backfillAvatarUrls(contributors map[string]contributorProfile) (int, int, error) { if contributors == nil { return 0, 0, errors.New("provided map is nil") } wg := sync.WaitGroup{} mtx := sync.Mutex{} errors := []error{} successfulBackfills := 0 // Todo: Add actual fetching logic once everything else has been verified requestAvatarUrl := func(string) (string, error) { return "", nil } avatarsThatNeedBackfill := 0 for ghUsername, con := range contributors { if con.AvatarUrl != "" { continue } avatarsThatNeedBackfill++ wg.Add(1) go func() { defer wg.Done() url, err := requestAvatarUrl(ghUsername) mtx.Lock() defer mtx.Unlock() if err != nil { errors = append(errors, err) return } successfulBackfills++ con.AvatarUrl = url contributors[ghUsername] = con }() } wg.Wait() if len(errors) == 0 { return avatarsThatNeedBackfill, successfulBackfills, nil } slices.SortFunc(errors, func(e1 error, e2 error) int { return strings.Compare(e1.Error(), e2.Error()) }) return avatarsThatNeedBackfill, successfulBackfills, workflowPhaseError{ Phase: "Avatar Backfill", Errors: errors, } } func main() { log.Println("Starting README validation") dirEntries, err := os.ReadDir(rootRegistryPath) if err != nil { log.Panic(err) } log.Printf("Identified %d top-level directory entries\n", len(dirEntries)) allReadmeFiles := []directoryReadme{} fsErrors := workflowPhaseError{ Phase: "FileSystem reading", Errors: []error{}, } for _, e := range dirEntries { dirPath := path.Join(rootRegistryPath, e.Name()) if !e.IsDir() { fsErrors.Errors = append( fsErrors.Errors, fmt.Errorf( "Detected non-directory file %q at base of main Registry directory", dirPath, ), ) continue } readmePath := path.Join(dirPath, "README.md") rmBytes, err := os.ReadFile(readmePath) if err != nil { fsErrors.Errors = append(fsErrors.Errors, err) continue } allReadmeFiles = append(allReadmeFiles, directoryReadme{ FilePath: readmePath, RawText: string(rmBytes), }) } if len(fsErrors.Errors) != 0 { log.Panic(fsErrors) } log.Printf("Processing %d README files\n", len(allReadmeFiles)) contributors, err := parseContributorFiles(allReadmeFiles) if err != nil { log.Panic(err) } log.Printf( "Processed %d README files as valid contributor profiles", len(contributors), ) backfillsNeeded, successCount, err := backfillAvatarUrls(contributors) if err != nil { log.Panic(err) } if backfillsNeeded == 0 { log.Println("No GitHub avatar backfills needed") } else { log.Printf( "Backfilled %d/%d missing GitHub avatars", backfillsNeeded, successCount, ) } log.Printf( "Processed all READMEs in the %q directory\n", rootRegistryPath, ) }