631 lines
14 KiB
Go

package main
import (
"bufio"
"errors"
"fmt"
"log"
"net/url"
"os"
"path"
"slices"
"strings"
"gopkg.in/yaml.v3"
)
const rootRegistryPath = "./registry"
type readme struct {
FilePath string
RawText string
}
type contributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
GithubUsername string `yaml:"github"`
// Script assumes that if value is nil, the Registry site build step will
// backfill the value with the user's GitHub avatar URL
AvatarURL *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
EmployerGithubUsername *string `yaml:"employer_github"`
ContributorStatus *string `yaml:"status"`
}
type contributorFrontmatterWithFilePath struct {
contributorProfileFrontmatter
FilePath string
}
var _ error = validationPhaseError{}
type validationPhaseError struct {
Phase string
Errors []error
}
func (vpe validationPhaseError) Error() string {
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.Phase)
for _, e := range vpe.Errors {
msg += fmt.Sprintf("\n- %v", e)
}
msg += "\n"
return msg
}
func extractFrontmatter(readmeText string) (string, error) {
if readmeText == "" {
return "", errors.New("README is empty")
}
const fence = "---"
fm := ""
fenceCount := 0
lineScanner := bufio.NewScanner(
strings.NewReader(strings.TrimSpace(readmeText)),
)
for lineScanner.Scan() {
nextLine := lineScanner.Text()
if fenceCount == 0 && nextLine != fence {
return "", errors.New("README does not start with frontmatter fence")
}
if nextLine != fence {
fm += nextLine + "\n"
continue
}
fenceCount++
if fenceCount >= 2 {
break
}
}
if fenceCount == 1 {
return "", errors.New("README does not have two sets of frontmatter fences")
}
return fm, nil
}
// A validation function for verifying one specific aspect of a contributor's
// frontmatter content. Each function should be able to return ALL data
// violations that apply to the function's area of concern, rather than
// returning the first error found
type contributorValidationFunc = func(fm contributorFrontmatterWithFilePath) []error
func validateContributorGithubUsername(fm contributorFrontmatterWithFilePath) []error {
problems := []error{}
if fm.GithubUsername == "" {
problems = append(
problems,
fmt.Errorf(
"missing GitHub username for %q",
fm.FilePath,
),
)
return problems
}
lower := strings.ToLower(fm.GithubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
problems = append(
problems,
fmt.Errorf(
"gitHub username %q (%q) is not a valid URL path segment",
fm.GithubUsername,
fm.FilePath,
),
)
}
return problems
}
func validateContributorEmployerGithubUsername(fm contributorFrontmatterWithFilePath) []error {
if fm.EmployerGithubUsername == nil {
return nil
}
problems := []error{}
if *fm.EmployerGithubUsername == "" {
problems = append(
problems,
fmt.Errorf(
"company_github field is defined but has empty value for %q",
fm.FilePath,
),
)
return problems
}
lower := strings.ToLower(*fm.EmployerGithubUsername)
if uriSafe := url.PathEscape(lower); uriSafe != lower {
problems = append(
problems,
fmt.Errorf(
"gitHub company username %q (%q) is not a valid URL path segment",
*fm.EmployerGithubUsername,
fm.FilePath,
),
)
}
if *fm.EmployerGithubUsername == fm.GithubUsername {
problems = append(
problems,
fmt.Errorf(
"cannot list own GitHub name (%q) as employer (%q)",
fm.GithubUsername,
fm.FilePath,
),
)
}
return problems
}
func validateContributorDisplayName(fm contributorFrontmatterWithFilePath) []error {
problems := []error{}
if fm.DisplayName == "" {
problems = append(
problems,
fmt.Errorf(
"GitHub user %q (%q) is missing display name",
fm.GithubUsername,
fm.FilePath,
),
)
}
return problems
}
func validateContributorLinkedinURL(fm contributorFrontmatterWithFilePath) []error {
if fm.LinkedinURL == nil {
return nil
}
problems := []error{}
if _, err := url.ParseRequestURI(*fm.LinkedinURL); err != nil {
problems = append(
problems,
fmt.Errorf(
"linkedIn URL %q (%q) is not valid: %v",
*fm.LinkedinURL,
fm.FilePath,
err,
),
)
}
return problems
}
func validateContributorEmail(fm contributorFrontmatterWithFilePath) []error {
if fm.SupportEmail == nil {
return nil
}
problems := []error{}
// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*fm.SupportEmail, "@")
if !ok {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing @ symbol",
*fm.LinkedinURL,
fm.FilePath,
),
)
return problems
}
if username == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing username",
*fm.LinkedinURL,
fm.FilePath,
),
)
}
domain, tld, ok := strings.Cut(server, ".")
if !ok {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing period for server segment",
*fm.LinkedinURL,
fm.FilePath,
),
)
return problems
}
if domain == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing domain",
*fm.LinkedinURL,
fm.FilePath,
),
)
}
if tld == "" {
problems = append(
problems,
fmt.Errorf(
"email address %q (%q) is missing top-level domain",
*fm.LinkedinURL,
fm.FilePath,
),
)
}
if strings.Contains(*fm.SupportEmail, "?") {
problems = append(
problems,
fmt.Errorf(
"email for %q is not allowed to contain search parameters",
fm.FilePath,
),
)
}
return problems
}
func validateContributorWebsite(fm contributorFrontmatterWithFilePath) []error {
if fm.WebsiteURL == nil {
return nil
}
problems := []error{}
if _, err := url.ParseRequestURI(*fm.WebsiteURL); err != nil {
problems = append(
problems,
fmt.Errorf(
"LinkedIn URL %q (%q) is not valid: %v",
*fm.WebsiteURL,
fm.FilePath,
err,
),
)
}
return problems
}
func validateContributorStatus(fm contributorFrontmatterWithFilePath) []error {
if fm.ContributorStatus == nil {
return nil
}
problems := []error{}
validStatuses := []string{"official", "partner", "community"}
if !slices.Contains(validStatuses, *fm.ContributorStatus) {
problems = append(
problems,
fmt.Errorf(
"contributor status %q (%q) is not valid",
*fm.ContributorStatus,
fm.FilePath,
),
)
}
return problems
}
// Can't validate the image actually leads to a valid resource in a pure
// function, but can at least catch obvious problems
func validateContributorAvatarURL(fm contributorFrontmatterWithFilePath) []error {
if fm.AvatarURL == nil {
return nil
}
problems := []error{}
if *fm.AvatarURL == "" {
problems = append(
problems,
fmt.Errorf(
"avatar URL for %q must be omitted or non-empty string",
fm.FilePath,
),
)
return problems
}
// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*fm.AvatarURL); err != nil {
problems = append(
problems,
fmt.Errorf(
"error %q (%q) is not a valid relative or absolute URL",
*fm.AvatarURL,
fm.FilePath,
),
)
}
if strings.Contains(*fm.AvatarURL, "?") {
problems = append(
problems,
fmt.Errorf(
"avatar URL for %q is not allowed to contain search parameters",
fm.FilePath,
),
)
}
supportedFileFormats := []string{".png", ".jpeg", ".jpg", ".gif", ".svg"}
matched := false
for _, ff := range supportedFileFormats {
matched = strings.HasSuffix(*fm.AvatarURL, ff)
if matched {
break
}
}
if !matched {
problems = append(
problems,
fmt.Errorf(
"avatar URL for %q does not end in a supported file format: [%s]",
fm.FilePath,
strings.Join(supportedFileFormats, ", "),
),
)
}
return problems
}
func validateContributorYaml(yml contributorFrontmatterWithFilePath) []error {
validationFuncs := []contributorValidationFunc{
validateContributorGithubUsername,
validateContributorEmployerGithubUsername,
validateContributorDisplayName,
validateContributorLinkedinURL,
validateContributorEmail,
validateContributorWebsite,
validateContributorStatus,
validateContributorAvatarURL,
}
allProblems := []error{}
for _, fn := range validationFuncs {
allProblems = append(allProblems, fn(yml)...)
}
return allProblems
}
func parseContributorFiles(readmeEntries []readme) (
map[string]contributorFrontmatterWithFilePath,
error,
) {
frontmatterByUsername := map[string]contributorFrontmatterWithFilePath{}
yamlParsingErrors := validationPhaseError{
Phase: "YAML parsing",
}
for _, rm := range readmeEntries {
fm, err := extractFrontmatter(rm.RawText)
if err != nil {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf("failed to parse %q: %v", rm.FilePath, err),
)
continue
}
yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf("failed to parse %q: %v", rm.FilePath, err),
)
continue
}
processed := contributorFrontmatterWithFilePath{
FilePath: rm.FilePath,
contributorProfileFrontmatter: yml,
}
if prev, isConflict := frontmatterByUsername[processed.GithubUsername]; isConflict {
yamlParsingErrors.Errors = append(
yamlParsingErrors.Errors,
fmt.Errorf(
"GitHub name conflict for %q for files %q and %q",
processed.GithubUsername,
prev.FilePath,
processed.FilePath,
),
)
continue
}
frontmatterByUsername[processed.GithubUsername] = processed
}
if len(yamlParsingErrors.Errors) != 0 {
return nil, yamlParsingErrors
}
employeeGithubGroups := map[string][]string{}
yamlValidationErrors := validationPhaseError{
Phase: "Raw YAML Validation",
}
for _, yml := range frontmatterByUsername {
errors := validateContributorYaml(yml)
if len(errors) > 0 {
yamlValidationErrors.Errors = append(
yamlValidationErrors.Errors,
errors...,
)
continue
}
if yml.EmployerGithubUsername != nil {
employeeGithubGroups[*yml.EmployerGithubUsername] = append(
employeeGithubGroups[*yml.EmployerGithubUsername],
yml.GithubUsername,
)
}
}
for companyName, group := range employeeGithubGroups {
if _, found := frontmatterByUsername[companyName]; found {
continue
}
yamlValidationErrors.Errors = append(
yamlValidationErrors.Errors,
fmt.Errorf(
"company %q does not exist in %q directory but is referenced by these profiles: [%s]",
companyName,
rootRegistryPath,
strings.Join(group, ", "),
),
)
}
if len(yamlValidationErrors.Errors) != 0 {
return nil, yamlValidationErrors
}
return frontmatterByUsername, nil
}
func aggregateContributorReadmeFiles() ([]readme, error) {
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
allReadmeFiles := []readme{}
problems := []error{}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
problems = append(
problems,
fmt.Errorf(
"Detected non-directory file %q at base of main Registry directory",
dirPath,
),
)
continue
}
readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
problems = append(problems, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
FilePath: readmePath,
RawText: string(rmBytes),
})
}
if len(problems) != 0 {
return nil, validationPhaseError{
Phase: "FileSystem reading",
Errors: problems,
}
}
return allReadmeFiles, nil
}
func validateRelativeUrls(
contributors map[string]contributorFrontmatterWithFilePath,
) error {
// This function only validates relative avatar URLs for now, but it can be
// beefed up to validate more in the future
problems := []error{}
for _, con := range contributors {
if con.AvatarURL == nil {
continue
}
if isRelativeURL := strings.HasPrefix(*con.AvatarURL, ".") ||
strings.HasPrefix(*con.AvatarURL, "/"); !isRelativeURL {
continue
}
if strings.HasPrefix(*con.AvatarURL, "..") {
problems = append(
problems,
fmt.Errorf(
"%q: relative avatar URLs cannot be placed outside a user's namespaced directory",
con.FilePath,
),
)
continue
}
absolutePath := strings.TrimSuffix(con.FilePath, "README.md") +
*con.AvatarURL
_, err := os.ReadFile(absolutePath)
if err != nil {
problems = append(
problems,
fmt.Errorf(
"relative avatar path %q for %q does not point to image in file system",
*con.AvatarURL,
con.FilePath,
),
)
}
}
if len(problems) == 0 {
return nil
}
return validationPhaseError{
Phase: "Relative URL validation",
Errors: problems,
}
}
func main() {
log.Println("Starting README validation")
allReadmeFiles, err := aggregateContributorReadmeFiles()
if err != nil {
log.Panic(err)
}
log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
if err != nil {
log.Panic(err)
}
log.Printf(
"Processed %d README files as valid contributor profiles",
len(contributors),
)
err = validateRelativeUrls(contributors)
if err != nil {
log.Panic(err)
}
log.Println("All relative URLs for READMEs are valid")
log.Printf(
"Processed all READMEs in the %q directory\n",
rootRegistryPath,
)
}