From 6e5d96087127024528f3f3f4481de378682a0c89 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Tue, 15 Apr 2025 15:06:16 +0000 Subject: [PATCH] refactor: split README logic into separate file --- cmd/readmevalidation/contributors.go | 49 +---------- cmd/readmevalidation/readmes.go | 127 +++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 47 deletions(-) create mode 100644 cmd/readmevalidation/readmes.go diff --git a/cmd/readmevalidation/contributors.go b/cmd/readmevalidation/contributors.go index 0d081bb6..fbcc338a 100644 --- a/cmd/readmevalidation/contributors.go +++ b/cmd/readmevalidation/contributors.go @@ -1,7 +1,6 @@ package main import ( - "bufio" "errors" "fmt" "net/url" @@ -13,17 +12,7 @@ import ( "gopkg.in/yaml.v3" ) -const rootRegistryPath = "./registry" - -var ( - validContributorStatuses = []string{"official", "partner", "community"} - supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} -) - -type readme struct { - filePath string - rawText string -} +var validContributorStatuses = []string{"official", "partner", "community"} type contributorProfileFrontmatter struct { DisplayName string `yaml:"display_name"` @@ -65,40 +54,6 @@ func (vpe validationPhaseError) Error() string { return msg } -func extractFrontmatter(readmeText string) (string, error) { - if readmeText == "" { - return "", errors.New("README is empty") - } - - const fence = "---" - fm := "" - fenceCount := 0 - lineScanner := bufio.NewScanner( - strings.NewReader(strings.TrimSpace(readmeText)), - ) - for lineScanner.Scan() { - nextLine := lineScanner.Text() - if fenceCount == 0 && nextLine != fence { - return "", errors.New("README does not start with frontmatter fence") - } - - if nextLine != fence { - fm += nextLine + "\n" - continue - } - - fenceCount++ - if fenceCount >= 2 { - break - } - } - - if fenceCount == 1 { - return "", errors.New("README does not have two sets of frontmatter fences") - } - return fm, nil -} - func validateContributorGithubUsername(githubUsername string) error { if githubUsername == "" { return errors.New("missing GitHub username") @@ -297,7 +252,7 @@ func validateContributorYaml(yml contributorProfile) []error { } func parseContributorProfile(rm readme) (contributorProfile, error) { - fm, err := extractFrontmatter(rm.rawText) + fm, _, err := separateFrontmatter(rm.rawText) if err != nil { return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err) } diff --git a/cmd/readmevalidation/readmes.go b/cmd/readmevalidation/readmes.go new file mode 100644 index 00000000..e6aff86e --- /dev/null +++ b/cmd/readmevalidation/readmes.go @@ -0,0 +1,127 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "strings" +) + +const rootRegistryPath = "./registry" + +var supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} + +// Readme represents a single README file within the repo (usually within the +// "/registry" directory). +type readme struct { + filePath string + rawText string +} + +// separateFrontmatter attempts to separate a README file's frontmatter content +// from the main README body, returning both values in that order. It does not +// validate whether the structure of the frontmatter is valid (i.e., that it's +// structured as YAML). +func separateFrontmatter(readmeText string) (string, string, error) { + if readmeText == "" { + return "", "", errors.New("README is empty") + } + + const fence = "---" + fm := "" + body := "" + fenceCount := 0 + lineScanner := bufio.NewScanner( + strings.NewReader(strings.TrimSpace(readmeText)), + ) + for lineScanner.Scan() { + nextLine := lineScanner.Text() + if fenceCount < 2 && nextLine == fence { + fenceCount++ + continue + } + // Break early if the very first line wasn't a fence, because then we + // know for certain that the README has problems + if fenceCount == 0 { + break + } + + // It should be safe to trim each line of the frontmatter on a per-line + // basis, because there shouldn't be any extra meaning attached to the + // indentation. The same does NOT apply to the README; best we can do is + // gather all the lines, and then trim around it + if inReadmeBody := fenceCount >= 2; inReadmeBody { + body += nextLine + "\n" + } else { + fm += strings.TrimSpace(nextLine) + "\n" + } + } + if fenceCount < 2 { + return "", "", errors.New("README does not have two sets of frontmatter fences") + } + if fm == "" { + return "", "", errors.New("readme has frontmatter fences but no frontmatter content") + } + + return fm, strings.TrimSpace(body), nil +} + +// validationPhase represents a specific phase during README validation. It is +// expected that each phase is discrete, and errors during one will prevent a +// future phase from starting. +type validationPhase int + +const ( + // validationPhaseFilesystemRead indicates when a README file is being read + // from the file system + validationPhaseFilesystemRead validationPhase = iota + + // validationPhaseReadmeParsing indicates when a README's frontmatter is being + // parsed as YAML. This phase does not include YAML validation. + validationPhaseReadmeParsing + + // validationPhaseReadmeValidation indicates when a README's frontmatter is + // being validated as proper YAML with expected keys. + validationPhaseReadmeValidation + + // validationPhaseAssetCrossReference indicates when a README's frontmatter + // is having all its relative URLs be validated for whether they point to + // valid resources. + validationPhaseAssetCrossReference +) + +func (p validationPhase) String() string { + switch p { + case validationPhaseFilesystemRead: + return "Filesystem reading" + case validationPhaseReadmeParsing: + return "README parsing" + case validationPhaseReadmeValidation: + return "README validation" + case validationPhaseAssetCrossReference: + return "Cross-referencing asset references" + default: + return "Unknown validation phase" + } +} + +var _ error = ValidationPhaseError{} + +// ValidationPhaseError represents an error that occurred during a specific +// phase of README validation. It should be used to collect ALL validation +// errors that happened during a specific phase, rather than the first one +// encountered. +type ValidationPhaseError struct { + phase validationPhase + errors []error +} + +func (vpe ValidationPhaseError) Error() string { + msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String()) + for _, e := range vpe.errors { + msg += fmt.Sprintf("\n- %v", e) + } + msg += "\n" + + return msg +}