registry/cmd/readmevalidation/coderresources.go
Michael Smith fd074a5643
fix: improve logic for existing README validation (#325)
Addresses part of https://github.com/coder/registry/issues/194

## Description

This PR beefs up the validation for the validation logic that we already
had in place. This PR does not include adding validation for templates
(which will be addressed in a second PR).

### Changes made
- Added logic to reject unknown frontmatter fields for modules and
contributor profile README files
- Added logic to handle frontmatter fields that were previously missed
in validation steps (GitHub username for contributors and Operating
Systems for modules)
- Updated a few comments (added some new comments, formatted existing
comments to meet 100-column width)

### Type of Change

- [ ] New module
- [x] Bug fix
- [ ] Feature/enhancement
- [ ] Documentation
- [x] Other

## Testing & Validation

- [x] Tests pass (`bun test`)
- [x] Code formatted (`bun run fmt`)
- [x] Changes tested locally
2025-08-13 11:07:08 -05:00

386 lines
12 KiB
Go

package main
import (
"bufio"
"context"
"errors"
"net/url"
"os"
"path"
"regexp"
"slices"
"strings"
"golang.org/x/xerrors"
"gopkg.in/yaml.v3"
)
var (
supportedResourceTypes = []string{"modules", "templates"}
operatingSystems = []string{"windows", "macos", "linux"}
// TODO: This is a holdover from the validation logic used by the Coder Modules repo. It gives us some assurance, but
// realistically, we probably want to parse any Terraform code snippets, and make some deeper guarantees about how it's
// structured. Just validating whether it *can* be parsed as Terraform would be a big improvement.
terraformVersionRe = regexp.MustCompile(`^\s*\bversion\s+=`)
)
type coderResourceFrontmatter struct {
Description string `yaml:"description"`
IconURL string `yaml:"icon"`
DisplayName *string `yaml:"display_name"`
Verified *bool `yaml:"verified"`
Tags []string `yaml:"tags"`
OperatingSystems []string `yaml:"supported_os"`
}
// A slice version of the struct tags from coderResourceFrontmatter. Might be worth using reflection to generate this
// list at runtime in the future, but this should be okay for now
var supportedCoderResourceStructKeys = []string{
"description", "icon", "display_name", "verified", "tags", "supported_os",
// TODO: This is an old, officially deprecated key from the archived coder/modules repo. We can remove this once we
// make sure that the Registry Server is no longer checking this field.
"maintainer_github",
}
// coderResourceReadme represents a README describing a Terraform resource used
// to help create Coder workspaces. As of 2025-04-15, this encapsulates both
// Coder Modules and Coder Templates.
type coderResourceReadme struct {
resourceType string
filePath string
body string
frontmatter coderResourceFrontmatter
}
func validateSupportedOperatingSystems(systems []string) []error {
var errs []error
for _, s := range systems {
if slices.Contains(operatingSystems, s) {
continue
}
errs = append(errs, xerrors.Errorf("detected unknown operating system %q", s))
}
return errs
}
func validateCoderResourceDisplayName(displayName *string) error {
if displayName != nil && *displayName == "" {
return xerrors.New("if defined, display_name must not be empty string")
}
return nil
}
func validateCoderResourceDescription(description string) error {
if description == "" {
return xerrors.New("frontmatter description cannot be empty")
}
return nil
}
func isPermittedRelativeURL(checkURL string) bool {
// Would normally be skittish about having relative paths like this, but it should be safe because we have
// guarantees about the structure of the repo, and where this logic will run.
return strings.HasPrefix(checkURL, "./") || strings.HasPrefix(checkURL, "/") || strings.HasPrefix(checkURL, "../../../../.icons")
}
func validateCoderResourceIconURL(iconURL string) []error {
if iconURL == "" {
return []error{xerrors.New("icon URL cannot be empty")}
}
errs := []error{}
// If the URL does not have a relative path.
if !strings.HasPrefix(iconURL, ".") && !strings.HasPrefix(iconURL, "/") {
if _, err := url.ParseRequestURI(iconURL); err != nil {
errs = append(errs, xerrors.New("absolute icon URL is not correctly formatted"))
}
if strings.Contains(iconURL, "?") {
errs = append(errs, xerrors.New("icon URLs cannot contain query parameters"))
}
return errs
}
// If the URL has a relative path.
if !isPermittedRelativeURL(iconURL) {
errs = append(errs, xerrors.Errorf("relative icon URL %q must either be scoped to that module's directory, or the top-level /.icons directory (this can usually be done by starting the path with \"../../../.icons\")", iconURL))
}
return errs
}
func validateCoderResourceTags(tags []string) error {
if tags == nil {
return xerrors.New("provided tags array is nil")
}
if len(tags) == 0 {
return nil
}
// All of these tags are used for the module/template filter controls in the Registry site. Need to make sure they
// can all be placed in the browser URL without issue.
invalidTags := []string{}
for _, t := range tags {
if t != url.QueryEscape(t) {
invalidTags = append(invalidTags, t)
}
}
if len(invalidTags) != 0 {
return xerrors.Errorf("found invalid tags (tags that cannot be used for filter state in the Registry website): [%s]", strings.Join(invalidTags, ", "))
}
return nil
}
func validateCoderResourceReadmeBody(body string) []error {
var errs []error
trimmed := strings.TrimSpace(body)
// TODO: this may cause unexpected behavior since the errors slice may have a 0 length. Add a test.
errs = append(errs, validateReadmeBody(trimmed)...)
foundParagraph := false
terraformCodeBlockCount := 0
foundTerraformVersionRef := false
lineNum := 0
isInsideCodeBlock := false
isInsideTerraform := false
lineScanner := bufio.NewScanner(strings.NewReader(trimmed))
for lineScanner.Scan() {
lineNum++
nextLine := lineScanner.Text()
// Code assumes that invalid headers would've already been handled by the base validation function, so we don't
// need to check deeper if the first line isn't an h1.
if lineNum == 1 {
if !strings.HasPrefix(nextLine, "# ") {
break
}
continue
}
if strings.HasPrefix(nextLine, "```") {
isInsideCodeBlock = !isInsideCodeBlock
isInsideTerraform = isInsideCodeBlock && strings.HasPrefix(nextLine, "```tf")
if isInsideTerraform {
terraformCodeBlockCount++
}
if strings.HasPrefix(nextLine, "```hcl") {
errs = append(errs, xerrors.New("all .hcl language references must be converted to .tf"))
}
continue
}
if isInsideCodeBlock {
if isInsideTerraform {
foundTerraformVersionRef = foundTerraformVersionRef || terraformVersionRe.MatchString(nextLine)
}
continue
}
// Code assumes that we can treat this case as the end of the "h1 section" and don't need to process any further lines.
if lineNum > 1 && strings.HasPrefix(nextLine, "#") {
break
}
// Code assumes that if we've reached this point, the only other options are:
// (1) empty spaces, (2) paragraphs, (3) HTML, and (4) asset references made via [] syntax.
trimmedLine := strings.TrimSpace(nextLine)
isParagraph := trimmedLine != "" && !strings.HasPrefix(trimmedLine, "![") && !strings.HasPrefix(trimmedLine, "<")
foundParagraph = foundParagraph || isParagraph
}
if terraformCodeBlockCount == 0 {
errs = append(errs, xerrors.New("did not find Terraform code block within h1 section"))
} else {
if terraformCodeBlockCount > 1 {
errs = append(errs, xerrors.New("cannot have more than one Terraform code block in h1 section"))
}
if !foundTerraformVersionRef {
errs = append(errs, xerrors.New("did not find Terraform code block that specifies 'version' field"))
}
}
if !foundParagraph {
errs = append(errs, xerrors.New("did not find paragraph within h1 section"))
}
if isInsideCodeBlock {
errs = append(errs, xerrors.New("code blocks inside h1 section do not all terminate before end of file"))
}
return errs
}
func validateCoderResourceReadme(rm coderResourceReadme) []error {
var errs []error
for _, err := range validateCoderResourceReadmeBody(rm.body) {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
if err := validateCoderResourceDisplayName(rm.frontmatter.DisplayName); err != nil {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
if err := validateCoderResourceDescription(rm.frontmatter.Description); err != nil {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
if err := validateCoderResourceTags(rm.frontmatter.Tags); err != nil {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
for _, err := range validateCoderResourceIconURL(rm.frontmatter.IconURL) {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
for _, err := range validateSupportedOperatingSystems(rm.frontmatter.OperatingSystems) {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
return errs
}
func parseCoderResourceReadme(resourceType string, rm readme) (coderResourceReadme, []error) {
fm, body, err := separateFrontmatter(rm.rawText)
if err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)}
}
keyErrs := validateFrontmatterYamlKeys(fm, supportedCoderResourceStructKeys)
if len(keyErrs) != 0 {
remapped := []error{}
for _, e := range keyErrs {
remapped = append(remapped, addFilePathToError(rm.filePath, e))
}
return coderResourceReadme{}, remapped
}
yml := coderResourceFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)}
}
return coderResourceReadme{
resourceType: resourceType,
filePath: rm.filePath,
body: body,
frontmatter: yml,
}, nil
}
func parseCoderResourceReadmeFiles(resourceType string, rms []readme) (map[string]coderResourceReadme, error) {
resources := map[string]coderResourceReadme{}
var yamlParsingErrs []error
for _, rm := range rms {
p, errs := parseCoderResourceReadme(resourceType, rm)
if len(errs) != 0 {
yamlParsingErrs = append(yamlParsingErrs, errs...)
continue
}
resources[p.filePath] = p
}
if len(yamlParsingErrs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadme,
errors: yamlParsingErrs,
}
}
yamlValidationErrors := []error{}
for _, readme := range resources {
errs := validateCoderResourceReadme(readme)
if len(errs) > 0 {
yamlValidationErrors = append(yamlValidationErrors, errs...)
}
}
if len(yamlValidationErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadme,
errors: yamlValidationErrors,
}
}
return resources, nil
}
// Todo: Need to beef up this function by grabbing each image/video URL from
// the body's AST.
func validateCoderResourceRelativeURLs(_ map[string]coderResourceReadme) error {
return nil
}
func aggregateCoderResourceReadmeFiles(resourceType string) ([]readme, error) {
registryFiles, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
var allReadmeFiles []readme
var errs []error
for _, rf := range registryFiles {
if !rf.IsDir() {
continue
}
resourceRootPath := path.Join(rootRegistryPath, rf.Name(), resourceType)
resourceDirs, err := os.ReadDir(resourceRootPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
errs = append(errs, err)
}
continue
}
for _, rd := range resourceDirs {
if !rd.IsDir() || rd.Name() == ".coder" {
continue
}
resourceReadmePath := path.Join(resourceRootPath, rd.Name(), "README.md")
rm, err := os.ReadFile(resourceReadmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: resourceReadmePath,
rawText: string(rm),
})
}
}
if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFile,
errors: errs,
}
}
return allReadmeFiles, nil
}
func validateAllCoderResourceFilesOfType(resourceType string) error {
if !slices.Contains(supportedResourceTypes, resourceType) {
return xerrors.Errorf("resource type %q is not part of supported list [%s]", resourceType, strings.Join(supportedResourceTypes, ", "))
}
allReadmeFiles, err := aggregateCoderResourceReadmeFiles(resourceType)
if err != nil {
return err
}
logger.Info(context.Background(), "processing README files", "num_files", len(allReadmeFiles))
resources, err := parseCoderResourceReadmeFiles(resourceType, allReadmeFiles)
if err != nil {
return err
}
logger.Info(context.Background(), "processed README files as valid Coder resources", "num_files", len(resources), "type", resourceType)
if err := validateCoderResourceRelativeURLs(resources); err != nil {
return err
}
logger.Info(context.Background(), "all relative URLs for READMEs are valid", "type", resourceType)
return nil
}