mirror of
https://github.com/lukaszraczylo/go-telegram.git
synced 2026-06-05 22:43:59 +00:00
3c04d7b0b1
The Telegram docs describe many string fields and parameters with
phrases like "can be ..., or ...", "must be one of ...", or "always X",
yet the generated Go API surface used raw `string` for every one of
them. Callers had to write magic strings or `string(api.ChatTypePrivate)`
to satisfy the field type. This change makes those fields typed Go
string enums emitted from the IR, so the IDE autocompletes valid values
and breaking-value drift surfaces at compile time.
Pipeline changes:
- internal/spec/ir.go: Field gains EnumValues []string. Empty for non-
enum fields; otherwise the wire-level values in doc order, deduped.
- cmd/scrape/enums.go: extractEnumValues recognises the curly-quoted
patterns Telegram uses ("can be either", "currently can be", "one
of", "must be", "always X") and rejects free-text quoted refs (e.g.
"Can be available only for X") via a tight gap check between the
trigger phrase and the first quoted value. parse_mode parameters
get the canonical Markdown / MarkdownV2 / HTML triple injected
because Telegram links to a separate formatting-options section
instead of listing values inline.
- cmd/genapi/enums.go: planEnums groups fields by sorted value-tuple,
picks a canonical Go enum name (most-common candidate, parent-
prefixed beats plain, shortest beats longer, alphabetical for
determinism), resolves cross-group name collisions by parent prefix.
- cmd/genapi/emitter.go + templates: goField rewrites the field type
to the planned enum name; multipartFieldEntry casts typed enum
values back to string when composing the wire map; enums.tmpl now
iterates the planned enums instead of hardcoding four hand-curated
ones; sentinelForField produces typed-constant test fixtures.
- api/enums.gen.go: regenerated from the live IR. 66 enum types, 155
constants. ParseMode, ChatType, MessageEntityType, ChatMember /
MessageOrigin / PaidMedia / Background / StoryAreaType / Reaction /
TransactionPartner / PassportElement variant Status & Type fields
are now typed.
- api/enums.go: hand-coded UpdateType (used by transport.LongPoller).
The Telegram docs do not enumerate Update payload kinds inline, so
the codegen pipeline cannot synthesise this enum.
- api/types.gen.go, api/methods.gen.go, api/methods_gen_test.go: 137
field declarations rewritten string -> typed enum.
- dispatch/, examples/: dropped every string(api.<Const>) cast. The
HasEntity filter now takes api.MessageEntityType; ChatType filter
compares typed values directly. ChatMember discriminator filter
casts variant.Status (typed per variant) to string for comparison.
- internal/spec/api.json, testdata/golden/*: regenerated and
refreshed. make regen-from-fixture is byte-deterministic across
runs.
Renames (no compat shims; v1 pre-public):
- EntityX -> MessageEntityTypeX (e.g. EntityBotCommand -> MessageEntityTypeBotCommand)
- EntityStrike -> MessageEntityTypeStrikethrough (full wire name)
308 lines
7.8 KiB
Go
308 lines
7.8 KiB
Go
package main
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/lukaszraczylo/go-telegram/internal/spec"
|
|
)
|
|
|
|
// enumDecl is one generated enum: a Go type alias of string plus a set
|
|
// of named constants. Values keep doc order; constant identifiers are
|
|
// derived from values via constName.
|
|
type enumDecl struct {
|
|
Name string
|
|
Values []string
|
|
}
|
|
|
|
// enumPlan is the deduplicated, name-resolved set of enums emitted from
|
|
// an API IR. Lookup returns the enum name for a given field reference;
|
|
// All returns the deterministic-ordered list of declarations to emit.
|
|
type enumPlan struct {
|
|
// fieldKey -> enum name. The fieldKey is a string built by enumKey.
|
|
byField map[string]string
|
|
// enum name -> declaration.
|
|
decls map[string]enumDecl
|
|
}
|
|
|
|
// enumKey identifies a single Field occurrence so the emitter can look
|
|
// up the enum name later. Parent is "" for method params (the method
|
|
// doesn't share a Go type with the field).
|
|
func enumKey(parent, fieldName string) string { return parent + "::" + fieldName }
|
|
|
|
// planEnums walks the IR, decides on enum names, deduplicates, and
|
|
// returns an enumPlan. All scraper-marked enum fields are covered.
|
|
func planEnums(api *spec.API) *enumPlan {
|
|
type ref struct {
|
|
parent string
|
|
fieldName string
|
|
jsonName string
|
|
values []string
|
|
valueKey string // canonical key for value-set dedup
|
|
}
|
|
|
|
var refs []ref
|
|
collect := func(parent string, fields []spec.Field) {
|
|
for _, f := range fields {
|
|
if len(f.EnumValues) == 0 {
|
|
continue
|
|
}
|
|
refs = append(refs, ref{
|
|
parent: parent,
|
|
fieldName: f.Name,
|
|
jsonName: f.JSONName,
|
|
values: f.EnumValues,
|
|
valueKey: valueKey(f.EnumValues),
|
|
})
|
|
}
|
|
}
|
|
for _, t := range api.Types {
|
|
collect(t.Name, t.Fields)
|
|
}
|
|
for _, m := range api.Methods {
|
|
// Method params have no shared Go parent type, so we pass "" as
|
|
// the parent. The default-name heuristic still produces the
|
|
// right answer for ParseMode-style enums.
|
|
collect("", m.Params)
|
|
}
|
|
|
|
// candidate name per ref (before collision resolution)
|
|
candidate := make([]string, len(refs))
|
|
for i, r := range refs {
|
|
candidate[i] = defaultEnumName(r.parent, r.jsonName, r.fieldName)
|
|
}
|
|
|
|
// Group by valueKey to coalesce identical value-sets across fields.
|
|
// Choose canonical name: prefer the most common candidate; tie-break
|
|
// by shortest name; final tie-break alphabetical.
|
|
type groupInfo struct {
|
|
values []string
|
|
name string
|
|
first int
|
|
}
|
|
groups := map[string]*groupInfo{}
|
|
for i, r := range refs {
|
|
g, ok := groups[r.valueKey]
|
|
if !ok {
|
|
groups[r.valueKey] = &groupInfo{values: r.values, first: i}
|
|
g = groups[r.valueKey]
|
|
}
|
|
_ = g
|
|
}
|
|
// Rank candidate names per group.
|
|
for vk := range groups {
|
|
counts := map[string]int{}
|
|
hasParent := map[string]bool{}
|
|
var names []string
|
|
for i, r := range refs {
|
|
if r.valueKey != vk {
|
|
continue
|
|
}
|
|
n := candidate[i]
|
|
if _, ok := counts[n]; !ok {
|
|
names = append(names, n)
|
|
}
|
|
counts[n]++
|
|
if r.parent != "" {
|
|
hasParent[n] = true
|
|
}
|
|
}
|
|
// Pick the canonical name for this group:
|
|
// 1. highest occurrence count wins;
|
|
// 2. names that originated from a parent type win over plain
|
|
// method-param candidates (avoids "Format"-style
|
|
// monosyllables);
|
|
// 3. shortest name wins;
|
|
// 4. alphabetical for full determinism.
|
|
sort.SliceStable(names, func(a, b int) bool {
|
|
if counts[names[a]] != counts[names[b]] {
|
|
return counts[names[a]] > counts[names[b]]
|
|
}
|
|
if hasParent[names[a]] != hasParent[names[b]] {
|
|
return hasParent[names[a]]
|
|
}
|
|
if len(names[a]) != len(names[b]) {
|
|
return len(names[a]) < len(names[b])
|
|
}
|
|
return names[a] < names[b]
|
|
})
|
|
groups[vk].name = names[0]
|
|
}
|
|
|
|
// Collision pass: two groups must not share the same enum name.
|
|
// When that happens, suffix the loser(s) with their parent type
|
|
// name so the result is unique. Iterate in deterministic order
|
|
// (groups sorted by valueKey).
|
|
used := map[string]string{} // name -> valueKey owner
|
|
var keys []string
|
|
for vk := range groups {
|
|
keys = append(keys, vk)
|
|
}
|
|
sort.Strings(keys)
|
|
for _, vk := range keys {
|
|
g := groups[vk]
|
|
if _, taken := used[g.name]; !taken {
|
|
used[g.name] = vk
|
|
continue
|
|
}
|
|
// Find a unique name by prepending a parent prefix from one of
|
|
// the contributing refs (the lowest-index ref in this group).
|
|
for i, r := range refs {
|
|
if r.valueKey != vk {
|
|
continue
|
|
}
|
|
if r.parent == "" {
|
|
continue
|
|
}
|
|
cand := r.parent + goNamePart(r.jsonName)
|
|
if _, taken := used[cand]; !taken {
|
|
g.name = cand
|
|
used[cand] = vk
|
|
goto next
|
|
}
|
|
_ = i
|
|
}
|
|
// Fallback: append a numeric disambiguator. Should not happen
|
|
// in practice for the Telegram docs but keeps the algorithm
|
|
// total.
|
|
for n := 2; ; n++ {
|
|
cand := groups[vk].name + itoa(n)
|
|
if _, taken := used[cand]; !taken {
|
|
g.name = cand
|
|
used[cand] = vk
|
|
break
|
|
}
|
|
}
|
|
next:
|
|
}
|
|
|
|
// Build the plan.
|
|
plan := &enumPlan{
|
|
byField: map[string]string{},
|
|
decls: map[string]enumDecl{},
|
|
}
|
|
for i, r := range refs {
|
|
name := groups[r.valueKey].name
|
|
plan.byField[enumKey(r.parent, r.fieldName)] = name
|
|
_ = i
|
|
}
|
|
for vk, g := range groups {
|
|
plan.decls[g.name] = enumDecl{Name: g.name, Values: g.values}
|
|
_ = vk
|
|
}
|
|
return plan
|
|
}
|
|
|
|
// All returns the enum declarations sorted by name for deterministic emit.
|
|
func (p *enumPlan) All() []enumDecl {
|
|
out := make([]enumDecl, 0, len(p.decls))
|
|
for _, d := range p.decls {
|
|
out = append(out, d)
|
|
}
|
|
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
|
|
return out
|
|
}
|
|
|
|
// FieldEnum returns the enum name for a field on a given parent type
|
|
// (use parent="" for method parameters), or "" if the field is not an
|
|
// enum.
|
|
func (p *enumPlan) FieldEnum(parent, fieldName string) string {
|
|
if p == nil {
|
|
return ""
|
|
}
|
|
return p.byField[enumKey(parent, fieldName)]
|
|
}
|
|
|
|
// defaultEnumName picks an initial Go enum name for a field. parse_mode
|
|
// fields collapse to the canonical "ParseMode"; otherwise the name is
|
|
// parent + PascalCase(jsonName).
|
|
func defaultEnumName(parent, jsonName, fieldName string) string {
|
|
if strings.HasSuffix(jsonName, "parse_mode") {
|
|
return "ParseMode"
|
|
}
|
|
return parent + goNamePart(jsonName)
|
|
}
|
|
|
|
// constName builds a Go constant identifier "<EnumName><PascalValue>"
|
|
// from a wire value. Slashes (mime types) become "Of" so
|
|
// "image/jpeg" → "ImageOfJpeg".
|
|
func constName(enumName, value string) string {
|
|
return enumName + valuePascal(value)
|
|
}
|
|
|
|
func valuePascal(v string) string {
|
|
// "image/jpeg" → "ImageOfJpeg"
|
|
parts := strings.Split(v, "/")
|
|
for i, p := range parts {
|
|
parts[i] = goNamePart(p)
|
|
}
|
|
return strings.Join(parts, "Of")
|
|
}
|
|
|
|
// goNamePart converts a snake_case (or already-PascalCase) token to
|
|
// PascalCase, mirroring scrape.goName behaviour without the acronym
|
|
// special-cases (which apply to wire identifiers, not enum values).
|
|
func goNamePart(s string) string {
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
parts := strings.Split(s, "_")
|
|
var b strings.Builder
|
|
for _, p := range parts {
|
|
if p == "" {
|
|
continue
|
|
}
|
|
// Acronyms used in Telegram wire names. Keeping in sync with
|
|
// scrape/table.go avoids divergent capitalisation between
|
|
// fieldName and constName.
|
|
switch p {
|
|
case "id":
|
|
b.WriteString("ID")
|
|
continue
|
|
case "url":
|
|
b.WriteString("URL")
|
|
continue
|
|
case "ip":
|
|
b.WriteString("IP")
|
|
continue
|
|
case "https":
|
|
b.WriteString("HTTPS")
|
|
continue
|
|
case "json":
|
|
b.WriteString("JSON")
|
|
continue
|
|
case "html":
|
|
b.WriteString("HTML")
|
|
continue
|
|
}
|
|
if c := p[0]; c >= 'a' && c <= 'z' {
|
|
b.WriteByte(c - 'a' + 'A')
|
|
b.WriteString(p[1:])
|
|
} else {
|
|
b.WriteString(p)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func valueKey(values []string) string {
|
|
cp := make([]string, len(values))
|
|
copy(cp, values)
|
|
sort.Strings(cp)
|
|
return strings.Join(cp, "\x00")
|
|
}
|
|
|
|
func itoa(n int) string {
|
|
if n == 0 {
|
|
return "0"
|
|
}
|
|
var buf [20]byte
|
|
i := len(buf)
|
|
for n > 0 {
|
|
i--
|
|
buf[i] = byte('0' + n%10)
|
|
n /= 10
|
|
}
|
|
return string(buf[i:])
|
|
}
|