From 36d33895b7a425987599165e173583f70c1cc91c Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Fri, 18 Apr 2025 19:27:53 +0000 Subject: [PATCH] refactor: start splitting up files --- cmd/readmevalidation/contributors.go | 84 ++------------------ cmd/readmevalidation/errors.go | 28 +++++++ cmd/readmevalidation/main.go | 2 +- cmd/readmevalidation/readmeFiles.go | 113 +++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 77 deletions(-) create mode 100644 cmd/readmevalidation/errors.go create mode 100644 cmd/readmevalidation/readmeFiles.go diff --git a/cmd/readmevalidation/contributors.go b/cmd/readmevalidation/contributors.go index 02823f26..4dfdfe47 100644 --- a/cmd/readmevalidation/contributors.go +++ b/cmd/readmevalidation/contributors.go @@ -1,7 +1,6 @@ package main import ( - "bufio" "errors" "fmt" "net/url" @@ -13,18 +12,10 @@ import ( "gopkg.in/yaml.v3" ) -const rootRegistryPath = "./registry" - var ( - validContributorStatuses = []string{"official", "partner", "community"} - supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} + validContributorStatuses = []string{"official", "partner", "community"} ) -type readme struct { - filePath string - rawText string -} - type contributorProfileFrontmatter struct { DisplayName string `yaml:"display_name"` Bio string `yaml:"bio"` @@ -44,61 +35,6 @@ type contributorProfile struct { filePath string } -var _ error = validationPhaseError{} - -type validationPhaseError struct { - phase string - errors []error -} - -func (vpe validationPhaseError) Error() string { - validationStrs := []string{} - for _, e := range vpe.errors { - validationStrs = append(validationStrs, fmt.Sprintf("- %v", e)) - } - slices.Sort(validationStrs) - - msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase) - msg += strings.Join(validationStrs, "\n") - msg += "\n" - - return msg -} - -func extractFrontmatter(readmeText string) (string, error) { - if readmeText == "" { - return "", errors.New("README is empty") - } - - const fence = "---" - fm := "" - fenceCount := 0 - lineScanner := bufio.NewScanner( - strings.NewReader(strings.TrimSpace(readmeText)), - ) - for lineScanner.Scan() { - nextLine := lineScanner.Text() - if fenceCount == 0 && nextLine != fence { - return "", errors.New("README does not start with frontmatter fence") - } - - if nextLine != fence { - fm += nextLine + "\n" - continue - } - - fenceCount++ - if fenceCount >= 2 { - break - } - } - - if fenceCount == 1 { - return "", errors.New("README does not have two sets of frontmatter fences") - } - return fm, nil -} - func validateContributorGithubUsername(githubUsername string) error { if githubUsername == "" { return errors.New("missing GitHub username") @@ -260,10 +196,6 @@ func validateContributorAvatarURL(avatarURL *string) []error { return problems } -func addFilePathToError(filePath string, err error) error { - return fmt.Errorf("%q: %v", filePath, err) -} - func validateContributorYaml(yml contributorProfile) []error { allProblems := []error{} @@ -297,7 +229,7 @@ func validateContributorYaml(yml contributorProfile) []error { } func parseContributorProfile(rm readme) (contributorProfile, error) { - fm, err := extractFrontmatter(rm.rawText) + fm, _, err := separateFrontmatter(rm.rawText) if err != nil { return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err) } @@ -331,7 +263,7 @@ func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfil } if len(yamlParsingErrors) != 0 { return nil, validationPhaseError{ - phase: "YAML parsing", + phase: validationPhaseReadmeParsing, errors: yamlParsingErrors, } } @@ -356,11 +288,11 @@ func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfil if _, found := profilesByUsername[companyName]; found { continue } - yamlValidationErrors = append(yamlValidationErrors, fmt.Errorf("company %q does not exist in %q directory but is referenced by these profiles: [%s]", companyName, rootRegistryPath, strings.Join(group, ", "))) + yamlValidationErrors = append(yamlValidationErrors, fmt.Errorf("%q: company %q does not exist but is referenced by these profiles: [%s]", rootRegistryPath, companyName, strings.Join(group, ", "))) } if len(yamlValidationErrors) != 0 { return nil, validationPhaseError{ - phase: "Raw YAML Validation", + phase: validationPhaseReadmeParsing, errors: yamlValidationErrors, } } @@ -397,7 +329,7 @@ func aggregateContributorReadmeFiles() ([]readme, error) { if len(problems) != 0 { return nil, validationPhaseError{ - phase: "FileSystem reading", + phase: validationPhaseFileLoad, errors: problems, } } @@ -405,7 +337,7 @@ func aggregateContributorReadmeFiles() ([]readme, error) { return allReadmeFiles, nil } -func validateRelativeUrls( +func validateContributorRelativeUrls( contributors map[string]contributorProfile, ) error { // This function only validates relative avatar URLs for now, but it can be @@ -440,7 +372,7 @@ func validateRelativeUrls( return nil } return validationPhaseError{ - phase: "Relative URL validation", + phase: validationPhaseAssetCrossReference, errors: problems, } } diff --git a/cmd/readmevalidation/errors.go b/cmd/readmevalidation/errors.go new file mode 100644 index 00000000..db13edc5 --- /dev/null +++ b/cmd/readmevalidation/errors.go @@ -0,0 +1,28 @@ +package main + +import "fmt" + +// validationPhaseError represents an error that occurred during a specific +// phase of README validation. It should be used to collect ALL validation +// errors that happened during a specific phase, rather than the first one +// encountered. +type validationPhaseError struct { + phase validationPhase + errors []error +} + +var _ error = validationPhaseError{} + +func (vpe validationPhaseError) Error() string { + msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String()) + for _, e := range vpe.errors { + msg += fmt.Sprintf("\n- %v", e) + } + msg += "\n" + + return msg +} + +func addFilePathToError(filePath string, err error) error { + return fmt.Errorf("%q: %v", filePath, err) +} diff --git a/cmd/readmevalidation/main.go b/cmd/readmevalidation/main.go index 90913185..bc8209fe 100644 --- a/cmd/readmevalidation/main.go +++ b/cmd/readmevalidation/main.go @@ -26,7 +26,7 @@ func main() { log.Panic(err) } - err = validateRelativeUrls(contributors) + err = validateContributorRelativeUrls(contributors) if err != nil { log.Panic(err) } diff --git a/cmd/readmevalidation/readmeFiles.go b/cmd/readmevalidation/readmeFiles.go new file mode 100644 index 00000000..69ccf9fa --- /dev/null +++ b/cmd/readmevalidation/readmeFiles.go @@ -0,0 +1,113 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "strings" +) + +const rootRegistryPath = "./registry" + +var supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} + +// readme represents a single README file within the repo (usually within the +// top-level "/registry" directory). +type readme struct { + filePath string + rawText string +} + +// separateFrontmatter attempts to separate a README file's frontmatter content +// from the main README body, returning both values in that order. It does not +// validate whether the structure of the frontmatter is valid (i.e., that it's +// structured as YAML). +func separateFrontmatter(readmeText string) (string, string, error) { + if readmeText == "" { + return "", "", errors.New("README is empty") + } + + const fence = "---" + fm := "" + body := "" + fenceCount := 0 + lineScanner := bufio.NewScanner( + strings.NewReader(strings.TrimSpace(readmeText)), + ) + for lineScanner.Scan() { + nextLine := lineScanner.Text() + if fenceCount < 2 && nextLine == fence { + fenceCount++ + continue + } + // Break early if the very first line wasn't a fence, because then we + // know for certain that the README has problems + if fenceCount == 0 { + break + } + + // It should be safe to trim each line of the frontmatter on a per-line + // basis, because there shouldn't be any extra meaning attached to the + // indentation. The same does NOT apply to the README; best we can do is + // gather all the lines, and then trim around it + if inReadmeBody := fenceCount >= 2; inReadmeBody { + body += nextLine + "\n" + } else { + fm += strings.TrimSpace(nextLine) + "\n" + } + } + if fenceCount < 2 { + return "", "", errors.New("README does not have two sets of frontmatter fences") + } + if fm == "" { + return "", "", errors.New("readme has frontmatter fences but no frontmatter content") + } + + return fm, strings.TrimSpace(body), nil +} + +// validationPhase represents a specific phase during README validation. It is +// expected that each phase is discrete, and errors during one will prevent a +// future phase from starting. +type validationPhase int + +const ( + // validationPhaseFileStructureValidation indicates when the entire Registry + // directory is being verified for having all files be placed in the file + // system as expected. + validationPhaseFileStructureValidation validationPhase = iota + + // validationPhaseFileLoad indicates when README files are being read from + // the file system + validationPhaseFileLoad + + // validationPhaseReadmeParsing indicates when a README's frontmatter is + // being parsed as YAML. This phase does not include YAML validation. + validationPhaseReadmeParsing + + // validationPhaseReadmeValidation indicates when a README's frontmatter is + // being validated as proper YAML with expected keys. + validationPhaseReadmeValidation + + // validationPhaseAssetCrossReference indicates when a README's frontmatter + // is having all its relative URLs be validated for whether they point to + // valid resources. + validationPhaseAssetCrossReference +) + +func (p validationPhase) String() string { + switch p { + case validationPhaseFileStructureValidation: + return "File structure validation" + case validationPhaseFileLoad: + return "Filesystem reading" + case validationPhaseReadmeParsing: + return "README parsing" + case validationPhaseReadmeValidation: + return "README validation" + case validationPhaseAssetCrossReference: + return "Cross-referencing relative asset URLs" + default: + return fmt.Sprintf("Unknown validation phase: %d", p) + } +}