registry/cmd/readmevalidation/coderresources.go
Michael Smith e94dfd2df6
fix: add validation for Github-Flavored Markdown Alerts (#394)
No issue to link – this was a problem we discovered while updating the
Registry website

## Description

This PR adds (very) basic validation for the GitHub Flavored Markdown
alerts that we allow contributors to add to their README files. The
errors that get generated should be correct, but the error messages
themselves aren't as helpful as they could be. I'm going to be handling
that in a separate PR, just so we can get this one in sooner.

### Changes made
- Added function for validating the core structure of all GFM alerts
- Updated existing README files that were failing the new validation
requirements

## Type of Change

- [ ] New module
- [x] Bug fix
- [ ] Feature/enhancement
- [ ] Documentation
- [ ] Other
2025-08-26 20:52:22 -05:00

356 lines
11 KiB
Go

package main
import (
"bufio"
"errors"
"net/url"
"os"
"path"
"regexp"
"slices"
"strings"
"golang.org/x/xerrors"
"gopkg.in/yaml.v3"
)
var (
supportedResourceTypes = []string{"modules", "templates"}
operatingSystems = []string{"windows", "macos", "linux"}
gfmAlertTypes = []string{"NOTE", "IMPORTANT", "CAUTION", "WARNING", "TIP"}
// TODO: This is a holdover from the validation logic used by the Coder Modules repo. It gives us some assurance, but
// realistically, we probably want to parse any Terraform code snippets, and make some deeper guarantees about how it's
// structured. Just validating whether it *can* be parsed as Terraform would be a big improvement.
terraformVersionRe = regexp.MustCompile(`^\s*\bversion\s+=`)
// Matches the format "> [!INFO]". Deliberately using a broad pattern to catch formatting issues that can mess up
// the renderer for the Registry website
gfmAlertRegex = regexp.MustCompile(`^>(\s*)\[!(\w+)\](\s*)(.*)`)
)
type coderResourceFrontmatter struct {
Description string `yaml:"description"`
IconURL string `yaml:"icon"`
DisplayName *string `yaml:"display_name"`
Verified *bool `yaml:"verified"`
Tags []string `yaml:"tags"`
OperatingSystems []string `yaml:"supported_os"`
}
// A slice version of the struct tags from coderResourceFrontmatter. Might be worth using reflection to generate this
// list at runtime in the future, but this should be okay for now
var supportedCoderResourceStructKeys = []string{
"description", "icon", "display_name", "verified", "tags", "supported_os",
// TODO: This is an old, officially deprecated key from the archived coder/modules repo. We can remove this once we
// make sure that the Registry Server is no longer checking this field.
"maintainer_github",
}
// coderResourceReadme represents a README describing a Terraform resource used
// to help create Coder workspaces. As of 2025-04-15, this encapsulates both
// Coder Modules and Coder Templates.
type coderResourceReadme struct {
resourceType string
filePath string
body string
frontmatter coderResourceFrontmatter
}
func validateSupportedOperatingSystems(systems []string) []error {
var errs []error
for _, s := range systems {
if slices.Contains(operatingSystems, s) {
continue
}
errs = append(errs, xerrors.Errorf("detected unknown operating system %q", s))
}
return errs
}
func validateCoderResourceDisplayName(displayName *string) error {
if displayName != nil && *displayName == "" {
return xerrors.New("if defined, display_name must not be empty string")
}
return nil
}
func validateCoderResourceDescription(description string) error {
if description == "" {
return xerrors.New("frontmatter description cannot be empty")
}
return nil
}
func isPermittedRelativeURL(checkURL string) bool {
// Would normally be skittish about having relative paths like this, but it should be safe because we have
// guarantees about the structure of the repo, and where this logic will run.
return strings.HasPrefix(checkURL, "./") || strings.HasPrefix(checkURL, "/") || strings.HasPrefix(checkURL, "../../../../.icons")
}
func validateCoderResourceIconURL(iconURL string) []error {
if iconURL == "" {
return []error{xerrors.New("icon URL cannot be empty")}
}
var errs []error
// If the URL does not have a relative path.
if !strings.HasPrefix(iconURL, ".") && !strings.HasPrefix(iconURL, "/") {
if _, err := url.ParseRequestURI(iconURL); err != nil {
errs = append(errs, xerrors.New("absolute icon URL is not correctly formatted"))
}
if strings.Contains(iconURL, "?") {
errs = append(errs, xerrors.New("icon URLs cannot contain query parameters"))
}
return errs
}
// If the URL has a relative path.
if !isPermittedRelativeURL(iconURL) {
errs = append(errs, xerrors.Errorf("relative icon URL %q must either be scoped to that module's directory, or the top-level /.icons directory (this can usually be done by starting the path with \"../../../.icons\")", iconURL))
}
return errs
}
func validateCoderResourceTags(tags []string) error {
if tags == nil {
return xerrors.New("provided tags array is nil")
}
if len(tags) == 0 {
return nil
}
// All of these tags are used for the module/template filter controls in the Registry site. Need to make sure they
// can all be placed in the browser URL without issue.
var invalidTags []string
for _, t := range tags {
if t != url.QueryEscape(t) {
invalidTags = append(invalidTags, t)
}
}
if len(invalidTags) != 0 {
return xerrors.Errorf("found invalid tags (tags that cannot be used for filter state in the Registry website): [%s]", strings.Join(invalidTags, ", "))
}
return nil
}
func validateCoderResourceFrontmatter(resourceType string, filePath string, fm coderResourceFrontmatter) []error {
if !slices.Contains(supportedResourceTypes, resourceType) {
return []error{xerrors.Errorf("cannot process unknown resource type %q", resourceType)}
}
var errs []error
if err := validateCoderResourceDisplayName(fm.DisplayName); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
if err := validateCoderResourceDescription(fm.Description); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
if err := validateCoderResourceTags(fm.Tags); err != nil {
errs = append(errs, addFilePathToError(filePath, err))
}
for _, err := range validateCoderResourceIconURL(fm.IconURL) {
errs = append(errs, addFilePathToError(filePath, err))
}
for _, err := range validateSupportedOperatingSystems(fm.OperatingSystems) {
errs = append(errs, addFilePathToError(filePath, err))
}
return errs
}
func parseCoderResourceReadme(resourceType string, rm readme) (coderResourceReadme, []error) {
fm, body, err := separateFrontmatter(rm.rawText)
if err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)}
}
keyErrs := validateFrontmatterYamlKeys(fm, supportedCoderResourceStructKeys)
if len(keyErrs) != 0 {
var remapped []error
for _, e := range keyErrs {
remapped = append(remapped, addFilePathToError(rm.filePath, e))
}
return coderResourceReadme{}, remapped
}
yml := coderResourceFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)}
}
return coderResourceReadme{
resourceType: resourceType,
filePath: rm.filePath,
body: body,
frontmatter: yml,
}, nil
}
func parseCoderResourceReadmeFiles(resourceType string, rms []readme) ([]coderResourceReadme, error) {
if !slices.Contains(supportedResourceTypes, resourceType) {
return nil, xerrors.Errorf("cannot process unknown resource type %q", resourceType)
}
resources := map[string]coderResourceReadme{}
var yamlParsingErrs []error
for _, rm := range rms {
p, errs := parseCoderResourceReadme(resourceType, rm)
if len(errs) != 0 {
yamlParsingErrs = append(yamlParsingErrs, errs...)
continue
}
resources[p.filePath] = p
}
if len(yamlParsingErrs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadme,
errors: yamlParsingErrs,
}
}
var serialized []coderResourceReadme
for _, r := range resources {
serialized = append(serialized, r)
}
slices.SortFunc(serialized, func(r1 coderResourceReadme, r2 coderResourceReadme) int {
return strings.Compare(r1.filePath, r2.filePath)
})
return serialized, nil
}
// Todo: Need to beef up this function by grabbing each image/video URL from
// the body's AST.
func validateCoderResourceRelativeURLs(_ []coderResourceReadme) error {
return nil
}
func aggregateCoderResourceReadmeFiles(resourceType string) ([]readme, error) {
if !slices.Contains(supportedResourceTypes, resourceType) {
return nil, xerrors.Errorf("cannot process unknown resource type %q", resourceType)
}
registryFiles, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}
var allReadmeFiles []readme
var errs []error
for _, rf := range registryFiles {
if !rf.IsDir() {
continue
}
resourceRootPath := path.Join(rootRegistryPath, rf.Name(), resourceType)
resourceDirs, err := os.ReadDir(resourceRootPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
errs = append(errs, err)
}
continue
}
for _, rd := range resourceDirs {
if !rd.IsDir() || rd.Name() == ".coder" {
continue
}
resourceReadmePath := path.Join(resourceRootPath, rd.Name(), "README.md")
rm, err := os.ReadFile(resourceReadmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: resourceReadmePath,
rawText: string(rm),
})
}
}
if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFile,
errors: errs,
}
}
return allReadmeFiles, nil
}
func validateResourceGfmAlerts(readmeBody string) []error {
trimmed := strings.TrimSpace(readmeBody)
if trimmed == "" {
return nil
}
var errs []error
var sourceLine string
isInsideGfmQuotes := false
isInsideCodeBlock := false
lineScanner := bufio.NewScanner(strings.NewReader(trimmed))
for lineScanner.Scan() {
sourceLine = lineScanner.Text()
if strings.HasPrefix(sourceLine, "```") {
isInsideCodeBlock = !isInsideCodeBlock
continue
}
if isInsideCodeBlock {
continue
}
isInsideGfmQuotes = isInsideGfmQuotes && strings.HasPrefix(sourceLine, "> ")
currentMatch := gfmAlertRegex.FindStringSubmatch(sourceLine)
if currentMatch == nil {
continue
}
// Nested GFM alerts is such a weird mistake that it's probably not really safe to keep trying to process the
// rest of the content, so this will prevent any other validations from happening for the given line
if isInsideGfmQuotes {
errs = append(errs, errors.New("registry does not support nested GFM alerts"))
continue
}
leadingWhitespace := currentMatch[1]
if len(leadingWhitespace) != 1 {
errs = append(errs, errors.New("GFM alerts must have one space between the '>' and the start of the GFM brackets"))
}
isInsideGfmQuotes = true
alertHeader := currentMatch[2]
upperHeader := strings.ToUpper(alertHeader)
if !slices.Contains(gfmAlertTypes, upperHeader) {
errs = append(errs, xerrors.Errorf("GFM alert type %q is not supported", alertHeader))
}
if alertHeader != upperHeader {
errs = append(errs, xerrors.Errorf("GFM alerts must be in all caps"))
}
trailingWhitespace := currentMatch[3]
if trailingWhitespace != "" {
errs = append(errs, xerrors.Errorf("GFM alerts must not have any trailing whitespace after the closing bracket"))
}
extraContent := currentMatch[4]
if extraContent != "" {
errs = append(errs, xerrors.Errorf("GFM alerts must not have any extra content on the same line"))
}
}
if gfmAlertRegex.Match([]byte(sourceLine)) {
errs = append(errs, xerrors.Errorf("README has an incomplete GFM alert at the end of the file"))
}
return errs
}