registry/cmd/readmevalidation/coderresources.go
35C4n0r e8e3a4e642
feat(cmd/tests): update tests to fail on invalid and absolute urls (#639)
## Description
Updated the tests:
- We now don't accept absolute urls (http / https)
- We only accept urls that start with `../../../../.icons/*` and exists

<!-- Briefly describe what this PR does and why -->

## Type of Change

- [ ] New module
- [ ] New template
- [x] Bug fix
- [ ] Feature/enhancement
- [ ] Documentation
- [ ] Other

## Testing & Validation

- [x] Tests pass (`bun test`)
- [x] Code formatted (`bun fmt`)
- [x] Changes tested locally

---------

Co-authored-by: DevCats <christofer@coder.com>
2026-01-08 08:13:16 -06:00

366 lines
11 KiB
Go

package main
import (
"bufio"
"errors"
"net/url"
"os"
"path"
"regexp"
"slices"
"strings"
"golang.org/x/xerrors"
"gopkg.in/yaml.v3"
)
var (
supportedResourceTypes = []string{"modules", "templates"}
operatingSystems = []string{"windows", "macos", "linux"}
gfmAlertTypes = []string{"NOTE", "IMPORTANT", "CAUTION", "WARNING", "TIP"}
// TODO: This is a holdover from the validation logic used by the Coder Modules repo. It gives us some assurance, but
// realistically, we probably want to parse any Terraform code snippets, and make some deeper guarantees about how it's
// structured. Just validating whether it *can* be parsed as Terraform would be a big improvement.
terraformVersionRe = regexp.MustCompile(`^\s*\bversion\s+=`)
// Matches the format "> [!INFO]". Deliberately using a broad pattern to catch formatting issues that can mess up
// the renderer for the Registry website
gfmAlertRegex = regexp.MustCompile(`^>(\s*)\[!(\w+)\](\s*)(.*)`)
)
type coderResourceFrontmatter struct {
Description string `yaml:"description"`
IconURL string `yaml:"icon"`
DisplayName *string `yaml:"display_name"`
Verified *bool `yaml:"verified"`
Tags []string `yaml:"tags"`
OperatingSystems []string `yaml:"supported_os"`
}
// A slice version of the struct tags from coderResourceFrontmatter. Might be worth using reflection to generate this
// list at runtime in the future, but this should be okay for now
var supportedCoderResourceStructKeys = []string{
"description", "icon", "display_name", "verified", "tags", "supported_os",
// TODO: This is an old, officially deprecated key from the archived coder/modules repo. We can remove this once we
// make sure that the Registry Server is no longer checking this field.
"maintainer_github",
}
// coderResourceReadme represents a README describing a Terraform resource used
// to help create Coder workspaces. As of 2025-04-15, this encapsulates both
// Coder Modules and Coder Templates.
type coderResourceReadme struct {
resourceType string
filePath string
body string
frontmatter coderResourceFrontmatter
}
func validateSupportedOperatingSystems(systems []string) []error {
var errs []error
for _, s := range systems {
if slices.Contains(operatingSystems, s) {
continue
}
errs = append(errs, xerrors.Errorf("detected unknown operating system %q", s))
}
return errs
}
func validateCoderResourceDisplayName(displayName *string) error {
if displayName != nil && *displayName == "" {
return xerrors.New("if defined, display_name must not be empty string")
}
return nil
}
func validateCoderResourceDescription(description string) error {
if description == "" {
return xerrors.New("frontmatter description cannot be empty")
}
return nil
}
func isPermittedRelativeURL(checkURL string, readmeFilePath string) error {
// Icon URLs must reference the top-level .icons directory
expectedPrefix := "../../../../.icons/"
if !strings.HasPrefix(checkURL, expectedPrefix) {
return xerrors.Errorf("icon URL %q must reference the top-level .icons directory using %q", checkURL, expectedPrefix)
}
// Resolve the path relative to the README file and check if it exists
readmeDir := path.Dir(readmeFilePath)
resolvedPath := path.Join(readmeDir, checkURL)
if _, err := os.Stat(resolvedPath); err != nil {
if os.IsNotExist(err) {
return xerrors.Errorf("icon file does not exist at resolved path %q (referenced as %q)", resolvedPath, checkURL)
}
return xerrors.Errorf("error checking icon file at %q: %v", resolvedPath, err)
}
return nil
}
func validateCoderResourceIconURL(iconURL string, filePath string) []error {
if iconURL == "" {
return []error{xerrors.New("icon URL cannot be empty")}
}
var errs []error
// Reject absolute HTTP/HTTPS URLs - all icons must be local to the repository
if strings.HasPrefix(iconURL, "http://") || strings.HasPrefix(iconURL, "https://") {
errs = append(errs, xerrors.Errorf("icon URL must reference the top-level .icons directory, not an absolute URL %q", iconURL))
return errs
}
// Validate that the icon references ../../../../.icons/ and exists
if err := isPermittedRelativeURL(iconURL, filePath); err != nil {
errs = append(errs, err)
}
return errs
}
func validateCoderResourceTags(tags []string) error {
if tags == nil {
return xerrors.New("provided tags array is nil")
}
if len(tags) == 0 {
return nil
}
// All of these tags are used for the module/template filter controls in the Registry site. Need to make sure they
// can all be placed in the browser URL without issue.
var invalidTags []string
for _, t := range tags {
if t != url.QueryEscape(t) {
invalidTags = append(invalidTags, t)
}
}
if len(invalidTags) != 0 {
return xerrors.Errorf("found invalid tags (tags that cannot be used for filter state in the Registry website): [%s]", strings.Join(invalidTags, ", "))
}
return nil
}
func validateCoderResourceFrontmatter(resourceType string, filePath string, fm coderResourceFrontmatter) []error {
if !slices.Contains(supportedResourceTypes, resourceType) {
return []error{xerrors.Errorf("cannot process unknown resource type %q", resourceType)}
}
var errs []error
if err := validateCoderResourceDisplayName(fm.DisplayName); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
if err := validateCoderResourceDescription(fm.Description); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
if err := validateCoderResourceTags(fm.Tags); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
for _, err := range validateCoderResourceIconURL(fm.IconURL, filePath) {
errs = append(errs, addFilePathToError(filePath, err))
}
for _, err := range validateSupportedOperatingSystems(fm.OperatingSystems) {
errs = append(errs, addFilePathToError(filePath, err))
}
return errs
}
func parseCoderResourceReadme(resourceType string, rm readme) (coderResourceReadme, []error) {
fm, body, err := separateFrontmatter(rm.rawText)
if err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)}
}
keyErrs := validateFrontmatterYamlKeys(fm, supportedCoderResourceStructKeys)
if len(keyErrs) != 0 {
var remapped []error
for _, e := range keyErrs {
remapped = append(remapped, addFilePathToError(rm.filePath, e))
}
return coderResourceReadme{}, remapped
}
yml := coderResourceFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)}
}
return coderResourceReadme{
resourceType: resourceType,
filePath: rm.filePath,
body: body,
frontmatter: yml,
}, nil
}
func parseCoderResourceReadmeFiles(resourceType string, rms []readme) ([]coderResourceReadme, error) {
if !slices.Contains(supportedResourceTypes, resourceType) {
return nil, xerrors.Errorf("cannot process unknown resource type %q", resourceType)
}
resources := map[string]coderResourceReadme{}
var yamlParsingErrs []error
for _, rm := range rms {
p, errs := parseCoderResourceReadme(resourceType, rm)
if len(errs) != 0 {
yamlParsingErrs = append(yamlParsingErrs, errs...)
continue
}
resources[p.filePath] = p
}
if len(yamlParsingErrs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadme,
errors: yamlParsingErrs,
}
}
var serialized []coderResourceReadme
for _, r := range resources {
serialized = append(serialized, r)
}
slices.SortFunc(serialized, func(r1 coderResourceReadme, r2 coderResourceReadme) int {
return strings.Compare(r1.filePath, r2.filePath)
})
return serialized, nil
}
// Todo: Need to beef up this function by grabbing each image/video URL from
// the body's AST.
func validateCoderResourceRelativeURLs(_ []coderResourceReadme) error {
return nil
}
func aggregateCoderResourceReadmeFiles(resourceType string) ([]readme, error) {
if !slices.Contains(supportedResourceTypes, resourceType) {
return nil, xerrors.Errorf("cannot process unknown resource type %q", resourceType)
}
registryFiles, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
var allReadmeFiles []readme
var errs []error
for _, rf := range registryFiles {
if !rf.IsDir() {
continue
}
resourceRootPath := path.Join(rootRegistryPath, rf.Name(), resourceType)
resourceDirs, err := os.ReadDir(resourceRootPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
errs = append(errs, err)
}
continue
}
for _, rd := range resourceDirs {
if !rd.IsDir() || rd.Name() == ".coder" {
continue
}
resourceReadmePath := path.Join(resourceRootPath, rd.Name(), "README.md")
rm, err := os.ReadFile(resourceReadmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: resourceReadmePath,
rawText: string(rm),
})
}
}
if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFile,
errors: errs,
}
}
return allReadmeFiles, nil
}
func validateResourceGfmAlerts(readmeBody string) []error {
trimmed := strings.TrimSpace(readmeBody)
if trimmed == "" {
return nil
}
var errs []error
var sourceLine string
isInsideGfmQuotes := false
isInsideCodeBlock := false
lineScanner := bufio.NewScanner(strings.NewReader(trimmed))
for lineScanner.Scan() {
sourceLine = lineScanner.Text()
if strings.HasPrefix(sourceLine, "```") {
isInsideCodeBlock = !isInsideCodeBlock
continue
}
if isInsideCodeBlock {
continue
}
isInsideGfmQuotes = isInsideGfmQuotes && strings.HasPrefix(sourceLine, "> ")
currentMatch := gfmAlertRegex.FindStringSubmatch(sourceLine)
if currentMatch == nil {
continue
}
// Nested GFM alerts is such a weird mistake that it's probably not really safe to keep trying to process the
// rest of the content, so this will prevent any other validations from happening for the given line
if isInsideGfmQuotes {
errs = append(errs, errors.New("registry does not support nested GFM alerts"))
continue
}
leadingWhitespace := currentMatch[1]
if len(leadingWhitespace) != 1 {
errs = append(errs, errors.New("GFM alerts must have one space between the '>' and the start of the GFM brackets"))
}
isInsideGfmQuotes = true
alertHeader := currentMatch[2]
upperHeader := strings.ToUpper(alertHeader)
if !slices.Contains(gfmAlertTypes, upperHeader) {
errs = append(errs, xerrors.Errorf("GFM alert type %q is not supported", alertHeader))
}
if alertHeader != upperHeader {
errs = append(errs, xerrors.Errorf("GFM alerts must be in all caps"))
}
trailingWhitespace := currentMatch[3]
if trailingWhitespace != "" {
errs = append(errs, xerrors.Errorf("GFM alerts must not have any trailing whitespace after the closing bracket"))
}
extraContent := currentMatch[4]
if extraContent != "" {
errs = append(errs, xerrors.Errorf("GFM alerts must not have any extra content on the same line"))
}
}
if gfmAlertRegex.Match([]byte(sourceLine)) {
errs = append(errs, xerrors.Errorf("README has an incomplete GFM alert at the end of the file"))
}
return errs
}