Files
go-telegram/cmd/genapi/enums.go
T
lukaszraczylo 3c04d7b0b1 feat(api): typed enums for all string-enum fields
The Telegram docs describe many string fields and parameters with
phrases like "can be ..., or ...", "must be one of ...", or "always X",
yet the generated Go API surface used raw `string` for every one of
them. Callers had to write magic strings or `string(api.ChatTypePrivate)`
to satisfy the field type. This change makes those fields typed Go
string enums emitted from the IR, so the IDE autocompletes valid values
and breaking-value drift surfaces at compile time.

Pipeline changes:

- internal/spec/ir.go: Field gains EnumValues []string. Empty for non-
  enum fields; otherwise the wire-level values in doc order, deduped.

- cmd/scrape/enums.go: extractEnumValues recognises the curly-quoted
  patterns Telegram uses ("can be either", "currently can be", "one
  of", "must be", "always X") and rejects free-text quoted refs (e.g.
  "Can be available only for X") via a tight gap check between the
  trigger phrase and the first quoted value. parse_mode parameters
  get the canonical Markdown / MarkdownV2 / HTML triple injected
  because Telegram links to a separate formatting-options section
  instead of listing values inline.

- cmd/genapi/enums.go: planEnums groups fields by sorted value-tuple,
  picks a canonical Go enum name (most-common candidate, parent-
  prefixed beats plain, shortest beats longer, alphabetical for
  determinism), resolves cross-group name collisions by parent prefix.

- cmd/genapi/emitter.go + templates: goField rewrites the field type
  to the planned enum name; multipartFieldEntry casts typed enum
  values back to string when composing the wire map; enums.tmpl now
  iterates the planned enums instead of hardcoding four hand-curated
  ones; sentinelForField produces typed-constant test fixtures.

- api/enums.gen.go: regenerated from the live IR. 66 enum types, 155
  constants. ParseMode, ChatType, MessageEntityType, ChatMember /
  MessageOrigin / PaidMedia / Background / StoryAreaType / Reaction /
  TransactionPartner / PassportElement variant Status & Type fields
  are now typed.

- api/enums.go: hand-coded UpdateType (used by transport.LongPoller).
  The Telegram docs do not enumerate Update payload kinds inline, so
  the codegen pipeline cannot synthesise this enum.

- api/types.gen.go, api/methods.gen.go, api/methods_gen_test.go: 137
  field declarations rewritten string -> typed enum.

- dispatch/, examples/: dropped every string(api.<Const>) cast. The
  HasEntity filter now takes api.MessageEntityType; ChatType filter
  compares typed values directly. ChatMember discriminator filter
  casts variant.Status (typed per variant) to string for comparison.

- internal/spec/api.json, testdata/golden/*: regenerated and
  refreshed. make regen-from-fixture is byte-deterministic across
  runs.

Renames (no compat shims; v1 pre-public):
- EntityX  -> MessageEntityTypeX  (e.g. EntityBotCommand -> MessageEntityTypeBotCommand)
- EntityStrike -> MessageEntityTypeStrikethrough (full wire name)
2026-05-09 17:55:34 +01:00

308 lines
7.8 KiB
Go

package main
import (
"sort"
"strings"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
// enumDecl is one generated enum: a Go type alias of string plus a set
// of named constants. Values keep doc order; constant identifiers are
// derived from values via constName.
type enumDecl struct {
Name string
Values []string
}
// enumPlan is the deduplicated, name-resolved set of enums emitted from
// an API IR. Lookup returns the enum name for a given field reference;
// All returns the deterministic-ordered list of declarations to emit.
type enumPlan struct {
// fieldKey -> enum name. The fieldKey is a string built by enumKey.
byField map[string]string
// enum name -> declaration.
decls map[string]enumDecl
}
// enumKey identifies a single Field occurrence so the emitter can look
// up the enum name later. Parent is "" for method params (the method
// doesn't share a Go type with the field).
func enumKey(parent, fieldName string) string { return parent + "::" + fieldName }
// planEnums walks the IR, decides on enum names, deduplicates, and
// returns an enumPlan. All scraper-marked enum fields are covered.
func planEnums(api *spec.API) *enumPlan {
type ref struct {
parent string
fieldName string
jsonName string
values []string
valueKey string // canonical key for value-set dedup
}
var refs []ref
collect := func(parent string, fields []spec.Field) {
for _, f := range fields {
if len(f.EnumValues) == 0 {
continue
}
refs = append(refs, ref{
parent: parent,
fieldName: f.Name,
jsonName: f.JSONName,
values: f.EnumValues,
valueKey: valueKey(f.EnumValues),
})
}
}
for _, t := range api.Types {
collect(t.Name, t.Fields)
}
for _, m := range api.Methods {
// Method params have no shared Go parent type, so we pass "" as
// the parent. The default-name heuristic still produces the
// right answer for ParseMode-style enums.
collect("", m.Params)
}
// candidate name per ref (before collision resolution)
candidate := make([]string, len(refs))
for i, r := range refs {
candidate[i] = defaultEnumName(r.parent, r.jsonName, r.fieldName)
}
// Group by valueKey to coalesce identical value-sets across fields.
// Choose canonical name: prefer the most common candidate; tie-break
// by shortest name; final tie-break alphabetical.
type groupInfo struct {
values []string
name string
first int
}
groups := map[string]*groupInfo{}
for i, r := range refs {
g, ok := groups[r.valueKey]
if !ok {
groups[r.valueKey] = &groupInfo{values: r.values, first: i}
g = groups[r.valueKey]
}
_ = g
}
// Rank candidate names per group.
for vk := range groups {
counts := map[string]int{}
hasParent := map[string]bool{}
var names []string
for i, r := range refs {
if r.valueKey != vk {
continue
}
n := candidate[i]
if _, ok := counts[n]; !ok {
names = append(names, n)
}
counts[n]++
if r.parent != "" {
hasParent[n] = true
}
}
// Pick the canonical name for this group:
// 1. highest occurrence count wins;
// 2. names that originated from a parent type win over plain
// method-param candidates (avoids "Format"-style
// monosyllables);
// 3. shortest name wins;
// 4. alphabetical for full determinism.
sort.SliceStable(names, func(a, b int) bool {
if counts[names[a]] != counts[names[b]] {
return counts[names[a]] > counts[names[b]]
}
if hasParent[names[a]] != hasParent[names[b]] {
return hasParent[names[a]]
}
if len(names[a]) != len(names[b]) {
return len(names[a]) < len(names[b])
}
return names[a] < names[b]
})
groups[vk].name = names[0]
}
// Collision pass: two groups must not share the same enum name.
// When that happens, suffix the loser(s) with their parent type
// name so the result is unique. Iterate in deterministic order
// (groups sorted by valueKey).
used := map[string]string{} // name -> valueKey owner
var keys []string
for vk := range groups {
keys = append(keys, vk)
}
sort.Strings(keys)
for _, vk := range keys {
g := groups[vk]
if _, taken := used[g.name]; !taken {
used[g.name] = vk
continue
}
// Find a unique name by prepending a parent prefix from one of
// the contributing refs (the lowest-index ref in this group).
for i, r := range refs {
if r.valueKey != vk {
continue
}
if r.parent == "" {
continue
}
cand := r.parent + goNamePart(r.jsonName)
if _, taken := used[cand]; !taken {
g.name = cand
used[cand] = vk
goto next
}
_ = i
}
// Fallback: append a numeric disambiguator. Should not happen
// in practice for the Telegram docs but keeps the algorithm
// total.
for n := 2; ; n++ {
cand := groups[vk].name + itoa(n)
if _, taken := used[cand]; !taken {
g.name = cand
used[cand] = vk
break
}
}
next:
}
// Build the plan.
plan := &enumPlan{
byField: map[string]string{},
decls: map[string]enumDecl{},
}
for i, r := range refs {
name := groups[r.valueKey].name
plan.byField[enumKey(r.parent, r.fieldName)] = name
_ = i
}
for vk, g := range groups {
plan.decls[g.name] = enumDecl{Name: g.name, Values: g.values}
_ = vk
}
return plan
}
// All returns the enum declarations sorted by name for deterministic emit.
func (p *enumPlan) All() []enumDecl {
out := make([]enumDecl, 0, len(p.decls))
for _, d := range p.decls {
out = append(out, d)
}
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
return out
}
// FieldEnum returns the enum name for a field on a given parent type
// (use parent="" for method parameters), or "" if the field is not an
// enum.
func (p *enumPlan) FieldEnum(parent, fieldName string) string {
if p == nil {
return ""
}
return p.byField[enumKey(parent, fieldName)]
}
// defaultEnumName picks an initial Go enum name for a field. parse_mode
// fields collapse to the canonical "ParseMode"; otherwise the name is
// parent + PascalCase(jsonName).
func defaultEnumName(parent, jsonName, fieldName string) string {
if strings.HasSuffix(jsonName, "parse_mode") {
return "ParseMode"
}
return parent + goNamePart(jsonName)
}
// constName builds a Go constant identifier "<EnumName><PascalValue>"
// from a wire value. Slashes (mime types) become "Of" so
// "image/jpeg" → "ImageOfJpeg".
func constName(enumName, value string) string {
return enumName + valuePascal(value)
}
func valuePascal(v string) string {
// "image/jpeg" → "ImageOfJpeg"
parts := strings.Split(v, "/")
for i, p := range parts {
parts[i] = goNamePart(p)
}
return strings.Join(parts, "Of")
}
// goNamePart converts a snake_case (or already-PascalCase) token to
// PascalCase, mirroring scrape.goName behaviour without the acronym
// special-cases (which apply to wire identifiers, not enum values).
func goNamePart(s string) string {
if s == "" {
return ""
}
parts := strings.Split(s, "_")
var b strings.Builder
for _, p := range parts {
if p == "" {
continue
}
// Acronyms used in Telegram wire names. Keeping in sync with
// scrape/table.go avoids divergent capitalisation between
// fieldName and constName.
switch p {
case "id":
b.WriteString("ID")
continue
case "url":
b.WriteString("URL")
continue
case "ip":
b.WriteString("IP")
continue
case "https":
b.WriteString("HTTPS")
continue
case "json":
b.WriteString("JSON")
continue
case "html":
b.WriteString("HTML")
continue
}
if c := p[0]; c >= 'a' && c <= 'z' {
b.WriteByte(c - 'a' + 'A')
b.WriteString(p[1:])
} else {
b.WriteString(p)
}
}
return b.String()
}
func valueKey(values []string) string {
cp := make([]string, len(values))
copy(cp, values)
sort.Strings(cp)
return strings.Join(cp, "\x00")
}
func itoa(n int) string {
if n == 0 {
return "0"
}
var buf [20]byte
i := len(buf)
for n > 0 {
i--
buf[i] = byte('0' + n%10)
n /= 10
}
return string(buf[i:])
}