mirror of
https://github.com/lukaszraczylo/go-telegram.git
synced 2026-06-05 22:43:59 +00:00
3c04d7b0b1
The Telegram docs describe many string fields and parameters with
phrases like "can be ..., or ...", "must be one of ...", or "always X",
yet the generated Go API surface used raw `string` for every one of
them. Callers had to write magic strings or `string(api.ChatTypePrivate)`
to satisfy the field type. This change makes those fields typed Go
string enums emitted from the IR, so the IDE autocompletes valid values
and breaking-value drift surfaces at compile time.
Pipeline changes:
- internal/spec/ir.go: Field gains EnumValues []string. Empty for non-
enum fields; otherwise the wire-level values in doc order, deduped.
- cmd/scrape/enums.go: extractEnumValues recognises the curly-quoted
patterns Telegram uses ("can be either", "currently can be", "one
of", "must be", "always X") and rejects free-text quoted refs (e.g.
"Can be available only for X") via a tight gap check between the
trigger phrase and the first quoted value. parse_mode parameters
get the canonical Markdown / MarkdownV2 / HTML triple injected
because Telegram links to a separate formatting-options section
instead of listing values inline.
- cmd/genapi/enums.go: planEnums groups fields by sorted value-tuple,
picks a canonical Go enum name (most-common candidate, parent-
prefixed beats plain, shortest beats longer, alphabetical for
determinism), resolves cross-group name collisions by parent prefix.
- cmd/genapi/emitter.go + templates: goField rewrites the field type
to the planned enum name; multipartFieldEntry casts typed enum
values back to string when composing the wire map; enums.tmpl now
iterates the planned enums instead of hardcoding four hand-curated
ones; sentinelForField produces typed-constant test fixtures.
- api/enums.gen.go: regenerated from the live IR. 66 enum types, 155
constants. ParseMode, ChatType, MessageEntityType, ChatMember /
MessageOrigin / PaidMedia / Background / StoryAreaType / Reaction /
TransactionPartner / PassportElement variant Status & Type fields
are now typed.
- api/enums.go: hand-coded UpdateType (used by transport.LongPoller).
The Telegram docs do not enumerate Update payload kinds inline, so
the codegen pipeline cannot synthesise this enum.
- api/types.gen.go, api/methods.gen.go, api/methods_gen_test.go: 137
field declarations rewritten string -> typed enum.
- dispatch/, examples/: dropped every string(api.<Const>) cast. The
HasEntity filter now takes api.MessageEntityType; ChatType filter
compares typed values directly. ChatMember discriminator filter
casts variant.Status (typed per variant) to string for comparison.
- internal/spec/api.json, testdata/golden/*: regenerated and
refreshed. make regen-from-fixture is byte-deterministic across
runs.
Renames (no compat shims; v1 pre-public):
- EntityX -> MessageEntityTypeX (e.g. EntityBotCommand -> MessageEntityTypeBotCommand)
- EntityStrike -> MessageEntityTypeStrikethrough (full wire name)
237 lines
6.7 KiB
Go
237 lines
6.7 KiB
Go
package main
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
"github.com/lukaszraczylo/go-telegram/internal/spec"
|
|
)
|
|
|
|
// parseFieldsTable walks a <table> for an object-type definition.
|
|
// Columns: Field, Type, Description (optional column orders are not
|
|
// supported; Telegram's docs use a stable layout).
|
|
//
|
|
// Optional fields are detected via the "Optional." prefix in the
|
|
// description text, which is the documented convention.
|
|
func parseFieldsTable(t *html.Node) []spec.Field {
|
|
rows := tableRows(t)
|
|
if len(rows) == 0 {
|
|
return nil
|
|
}
|
|
var fields []spec.Field
|
|
for _, row := range rows[1:] { // skip header
|
|
cells := rowCells(row)
|
|
if len(cells) < 3 {
|
|
continue
|
|
}
|
|
jname := strings.TrimSpace(textOf(cells[0]))
|
|
typeText := strings.TrimSpace(textOf(cells[1]))
|
|
desc := strings.TrimSpace(textOf(cells[2]))
|
|
|
|
required := !strings.HasPrefix(desc, "Optional.")
|
|
tref := parseTypeRef(typeText)
|
|
var enumVals []string
|
|
if tref.Kind == spec.KindPrimitive && tref.Name == "string" {
|
|
enumVals = extractEnumValues(jname, desc)
|
|
}
|
|
fields = append(fields, spec.Field{
|
|
Name: goName(jname),
|
|
JSONName: jname,
|
|
Type: tref,
|
|
Required: required,
|
|
Doc: desc,
|
|
EnumValues: enumVals,
|
|
})
|
|
}
|
|
return fields
|
|
}
|
|
|
|
// parseParamsTable walks a <table> for a method definition.
|
|
// Columns: Parameter, Type, Required, Description.
|
|
func parseParamsTable(t *html.Node) []spec.Field {
|
|
rows := tableRows(t)
|
|
if len(rows) == 0 {
|
|
return nil
|
|
}
|
|
var params []spec.Field
|
|
for _, row := range rows[1:] {
|
|
cells := rowCells(row)
|
|
if len(cells) < 4 {
|
|
continue
|
|
}
|
|
jname := strings.TrimSpace(textOf(cells[0]))
|
|
typeText := strings.TrimSpace(textOf(cells[1]))
|
|
req := strings.EqualFold(strings.TrimSpace(textOf(cells[2])), "Yes")
|
|
desc := strings.TrimSpace(textOf(cells[3]))
|
|
|
|
tref := parseTypeRef(typeText)
|
|
var enumVals []string
|
|
if tref.Kind == spec.KindPrimitive && tref.Name == "string" {
|
|
enumVals = extractEnumValues(jname, desc)
|
|
}
|
|
params = append(params, spec.Field{
|
|
Name: goName(jname),
|
|
JSONName: jname,
|
|
Type: tref,
|
|
Required: req,
|
|
Doc: desc,
|
|
EnumValues: enumVals,
|
|
})
|
|
}
|
|
return params
|
|
}
|
|
|
|
// tableRows returns the <tr> children of a <table>, skipping over
|
|
// any <thead>/<tbody> wrappers.
|
|
func tableRows(t *html.Node) []*html.Node {
|
|
var rows []*html.Node
|
|
var visit func(*html.Node)
|
|
visit = func(n *html.Node) {
|
|
if n.Type == html.ElementNode && n.Data == "tr" {
|
|
rows = append(rows, n)
|
|
return
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
visit(c)
|
|
}
|
|
}
|
|
visit(t)
|
|
return rows
|
|
}
|
|
|
|
// rowCells returns the <td> (or <th>) children of a <tr>.
|
|
func rowCells(tr *html.Node) []*html.Node {
|
|
var cells []*html.Node
|
|
for c := tr.FirstChild; c != nil; c = c.NextSibling {
|
|
if c.Type == html.ElementNode && (c.Data == "td" || c.Data == "th") {
|
|
cells = append(cells, c)
|
|
}
|
|
}
|
|
return cells
|
|
}
|
|
|
|
// goName converts a snake_case JSON identifier to PascalCase.
|
|
// Special-cases common acronyms used in the Telegram docs.
|
|
func goName(s string) string {
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
parts := strings.Split(s, "_")
|
|
var b strings.Builder
|
|
for _, p := range parts {
|
|
if p == "" {
|
|
continue
|
|
}
|
|
switch p {
|
|
case "id":
|
|
b.WriteString("ID")
|
|
case "url":
|
|
b.WriteString("URL")
|
|
case "ip":
|
|
b.WriteString("IP")
|
|
case "https":
|
|
b.WriteString("HTTPS")
|
|
case "json":
|
|
b.WriteString("JSON")
|
|
case "html":
|
|
b.WriteString("HTML")
|
|
default:
|
|
if p[0] >= 'a' && p[0] <= 'z' {
|
|
b.WriteByte(p[0] - 'a' + 'A')
|
|
b.WriteString(p[1:])
|
|
} else {
|
|
b.WriteString(p)
|
|
}
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// parseTypeRef decodes the type-cell text into a spec.TypeRef.
|
|
//
|
|
// Recognised shapes:
|
|
//
|
|
// "Integer" → primitive int64
|
|
// "String" → primitive string
|
|
// "Boolean" / "True" → primitive bool
|
|
// "Float" / "Float number"→ primitive float64
|
|
// "Array of X" → array of (parseTypeRef of X)
|
|
// "Array of Array of X" → array of array of X
|
|
// "Foo" → named Foo
|
|
// "Foo or Bar" → oneOf {Foo, Bar}
|
|
// "InputFile or String" → oneOf (caller may translate to InputFile)
|
|
//
|
|
// parseTypeRef decodes the type-cell text into a spec.TypeRef.
|
|
//
|
|
// Recognised shapes:
|
|
//
|
|
// "Integer" → primitive int64
|
|
// "String" → primitive string
|
|
// "Boolean" / "True" → primitive bool
|
|
// "Float" / "Float number"→ primitive float64
|
|
// "Array of X" → array of (parseTypeRef of X)
|
|
// "Array of Array of X" → array of array of X
|
|
// "Foo" → named Foo
|
|
// "Foo or Bar" → oneOf {Foo, Bar}
|
|
// "Foo, Bar and Baz" → oneOf {Foo, Bar, Baz} (Telegram's comma+and union form)
|
|
// "InputFile or String" → oneOf (caller may translate to InputFile)
|
|
func parseTypeRef(s string) spec.TypeRef {
|
|
s = strings.TrimSpace(s)
|
|
// Array prefix.
|
|
if rest, ok := strings.CutPrefix(s, "Array of "); ok {
|
|
elem := parseTypeRef(rest)
|
|
return spec.TypeRef{Kind: spec.KindArray, ElemType: &elem}
|
|
}
|
|
// Comma-and union ("X, Y, Z and W") — used by Telegram for ≥3-variant unions.
|
|
if strings.Contains(s, ", ") && strings.Contains(s, " and ") {
|
|
parts := splitCommaAnd(s)
|
|
variants := make([]string, 0, len(parts))
|
|
for _, p := range parts {
|
|
variants = append(variants, primitiveOrNamed(strings.TrimSpace(p)).Name)
|
|
}
|
|
return spec.TypeRef{Kind: spec.KindOneOf, Variants: variants}
|
|
}
|
|
// "X or Y" union (the 2-variant form).
|
|
if strings.Contains(s, " or ") {
|
|
parts := strings.Split(s, " or ")
|
|
variants := make([]string, 0, len(parts))
|
|
for _, p := range parts {
|
|
variants = append(variants, primitiveOrNamed(strings.TrimSpace(p)).Name)
|
|
}
|
|
return spec.TypeRef{Kind: spec.KindOneOf, Variants: variants}
|
|
}
|
|
return primitiveOrNamed(s)
|
|
}
|
|
|
|
// splitCommaAnd splits "A, B, C and D" → ["A", "B", "C", "D"].
|
|
func splitCommaAnd(s string) []string {
|
|
// Replace " and " with ", " then split on ", ".
|
|
s = strings.ReplaceAll(s, " and ", ", ")
|
|
parts := strings.Split(s, ", ")
|
|
out := make([]string, 0, len(parts))
|
|
for _, p := range parts {
|
|
if p = strings.TrimSpace(p); p != "" {
|
|
out = append(out, p)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// primitiveOrNamed maps a single-word type cell to either a primitive
|
|
// or a named TypeRef. Unrecognised words are treated as named types.
|
|
func primitiveOrNamed(s string) spec.TypeRef {
|
|
switch s {
|
|
case "Integer", "Int":
|
|
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "int64"}
|
|
case "String":
|
|
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}
|
|
case "Boolean", "Bool", "True", "False":
|
|
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}
|
|
case "Float", "Float number":
|
|
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "float64"}
|
|
default:
|
|
return spec.TypeRef{Kind: spec.KindNamed, Name: s}
|
|
}
|
|
}
|