Initial release of go-telegram

A fully-generated, strongly-typed Go client for the Telegram Bot API.

* 176 methods + 301 types generated from Bot API v10.0
* 1408 auto-generated tests (8 scenarios per method)
* Typed unions throughout — no 'any' in the public surface
* Pluggable HTTP transport and JSON codec (default goccy/go-json)
* Built-in retry middleware honouring Telegram's retry_after
* Generic dispatcher with filters and conversation handlers
* Self-verifying codegen pipeline (regen → audit → emit → run tests)
* 14 example bots covering common patterns
This commit is contained in:
2026-05-09 13:09:27 +01:00
commit 9072e9eafb
167 changed files with 106860 additions and 0 deletions
+211
View File
@@ -0,0 +1,211 @@
package main
import (
"testing"
"github.com/lukaszraczylo/go-telegram/internal/spec"
"github.com/stretchr/testify/require"
)
// ---------------------------------------------------------------------------
// parseTypeRef — edge cases
// ---------------------------------------------------------------------------
func TestParseTypeRef_Empty(t *testing.T) {
// Empty string → named with empty name (fallback).
got := parseTypeRef("")
require.Equal(t, spec.KindNamed, got.Kind)
require.Equal(t, "", got.Name)
}
func TestParseTypeRef_Whitespace(t *testing.T) {
got := parseTypeRef(" Integer ")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "int64", got.Name)
}
func TestParseTypeRef_True(t *testing.T) {
got := parseTypeRef("True")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestParseTypeRef_False(t *testing.T) {
got := parseTypeRef("False")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestParseTypeRef_FloatNumber(t *testing.T) {
got := parseTypeRef("Float number")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "float64", got.Name)
}
func TestParseTypeRef_Int(t *testing.T) {
got := parseTypeRef("Int")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "int64", got.Name)
}
func TestParseTypeRef_Bool(t *testing.T) {
got := parseTypeRef("Bool")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestParseTypeRef_CommaAndUnion(t *testing.T) {
// "Foo, Bar and Baz" → oneOf{Foo, Bar, Baz}
got := parseTypeRef("InputMediaPhoto, InputMediaVideo and InputMediaDocument")
require.Equal(t, spec.KindOneOf, got.Kind)
require.Len(t, got.Variants, 3)
require.Contains(t, got.Variants, "InputMediaPhoto")
require.Contains(t, got.Variants, "InputMediaVideo")
require.Contains(t, got.Variants, "InputMediaDocument")
}
func TestParseTypeRef_ArrayOfNothing(t *testing.T) {
// "Array of " with trailing space — TrimSpace removes the trailing space
// leaving "Array of" which does NOT match the "Array of " prefix, so it
// falls through to primitiveOrNamed and returns KindNamed (not KindArray).
got := parseTypeRef("Array of ")
require.Equal(t, spec.KindNamed, got.Kind)
}
// ---------------------------------------------------------------------------
// splitCommaAnd
// ---------------------------------------------------------------------------
func TestSplitCommaAnd_ThreeVariants(t *testing.T) {
got := splitCommaAnd("A, B and C")
require.Equal(t, []string{"A", "B", "C"}, got)
}
func TestSplitCommaAnd_FourVariants(t *testing.T) {
got := splitCommaAnd("A, B, C and D")
require.Equal(t, []string{"A", "B", "C", "D"}, got)
}
func TestSplitCommaAnd_ExtraSpaces(t *testing.T) {
got := splitCommaAnd(" Foo , Bar and Baz ")
require.Len(t, got, 3)
}
// ---------------------------------------------------------------------------
// goName — edge cases
// ---------------------------------------------------------------------------
func TestGoName_Empty(t *testing.T) {
require.Equal(t, "", goName(""))
}
func TestGoName_SingleWord(t *testing.T) {
require.Equal(t, "Photo", goName("photo"))
}
func TestGoName_JSON(t *testing.T) {
require.Equal(t, "JSON", goName("json"))
}
func TestGoName_HTML(t *testing.T) {
require.Equal(t, "HTML", goName("html"))
}
func TestGoName_HTTPS(t *testing.T) {
require.Equal(t, "HTTPS", goName("https"))
}
func TestGoName_AlreadyUpperSegment(t *testing.T) {
// Segment that starts with uppercase letter should be passed through.
require.Equal(t, "MediaGroupID", goName("media_group_id"))
}
// ---------------------------------------------------------------------------
// extractReturn — additional patterns
// ---------------------------------------------------------------------------
func TestExtractReturn_ArrayPattern(t *testing.T) {
desc := "Returns an Array of Update objects."
got := extractReturn(desc)
require.Equal(t, spec.KindArray, got.Kind)
require.Equal(t, "Update", got.ElemType.Name)
}
func TestExtractReturn_BoolPattern(t *testing.T) {
desc := "Returns True on success."
got := extractReturn(desc)
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestExtractReturn_OnSuccessTrueIsReturned(t *testing.T) {
desc := "On success, true is returned."
got := extractReturn(desc)
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestExtractReturn_NamedObject(t *testing.T) {
desc := "On success, returns a Message object."
got := extractReturn(desc)
require.Equal(t, spec.KindNamed, got.Kind)
require.Equal(t, "Message", got.Name)
}
func TestExtractReturn_MessageOrBool(t *testing.T) {
desc := "On success, the edited Message is returned, otherwise True is returned."
got := extractReturn(desc)
require.Equal(t, spec.KindNamed, got.Kind)
require.Equal(t, "MessageOrBool", got.Name)
}
func TestExtractReturn_InFormOf(t *testing.T) {
desc := "The answer is provided in form of a ChatInviteLink object."
got := extractReturn(desc)
require.Equal(t, spec.KindNamed, got.Kind)
require.Equal(t, "ChatInviteLink", got.Name)
}
func TestExtractReturn_Fallback(t *testing.T) {
// No recognized pattern → bool fallback.
got := extractReturn("This method does something interesting.")
require.Equal(t, spec.KindPrimitive, got.Kind)
require.Equal(t, "bool", got.Name)
}
func TestExtractReturn_MultipleReturnsFirstWins(t *testing.T) {
// Doc with multiple "Returns" phrases — first matching pattern should win.
// The indefinite-article pattern ("Returns a X object") appears earlier in
// the priority list than "Returns True", so it matches "Returns a Message"
// before the bool pattern can fire.
desc := "Returns True on success. You can also Returns a Message object later."
got := extractReturn(desc)
// The indefinite-article pattern fires first → returns Message (KindNamed).
require.Equal(t, spec.KindNamed, got.Kind)
require.Equal(t, "Message", got.Name)
}
// ---------------------------------------------------------------------------
// extractVersion
// ---------------------------------------------------------------------------
func TestExtractVersion_InTitle(t *testing.T) {
sections := []section{
{Title: "Bot API 7.3", Description: ""},
}
require.Equal(t, "7.3", extractVersion(sections))
}
func TestExtractVersion_InDescription(t *testing.T) {
sections := []section{
{Title: "April 2024", Description: "Released Bot API 7.2."},
}
require.Equal(t, "7.2", extractVersion(sections))
}
func TestExtractVersion_NotFound(t *testing.T) {
sections := []section{
{Title: "Introduction", Description: "Welcome to the API."},
}
require.Equal(t, "", extractVersion(sections))
}
+77
View File
@@ -0,0 +1,77 @@
// Command scrape parses the Telegram Bot API HTML page into the IR
// (internal/spec.API) and writes it to internal/spec/api.json.
//
// Usage:
//
// scrape -input <file> (read HTML from local file)
// scrape -url <url> (fetch HTML from URL; default: live docs)
// scrape -output <file> (output path; default: internal/spec/api.json)
package main
import (
"errors"
"flag"
"fmt"
"io"
"net/http"
"os"
"time"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
const defaultURL = "https://core.telegram.org/bots/api"
func main() {
input := flag.String("input", "", "local HTML file (overrides -url)")
url := flag.String("url", defaultURL, "URL to fetch HTML from")
output := flag.String("output", "internal/spec/api.json", "output path")
overridesPath := flag.String("overrides", "internal/spec/overrides.json", "path to overrides JSON")
flag.Parse()
if err := run(*input, *url, *output, *overridesPath); err != nil {
fmt.Fprintln(os.Stderr, "scrape:", err)
os.Exit(1)
}
}
func run(input, url, output, overridesPath string) error {
htmlBytes, err := readHTML(input, url)
if err != nil {
return fmt.Errorf("read html: %w", err)
}
api, err := scrape(htmlBytes)
if err != nil {
return fmt.Errorf("scrape: %w", err)
}
overrides, err := spec.LoadOverrides(overridesPath)
if err != nil {
return fmt.Errorf("load overrides: %w", err)
}
overrides.Apply(api)
return writeJSON(output, api)
}
func readHTML(input, url string) ([]byte, error) {
if input != "" {
return os.ReadFile(input)
}
c := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "go-telegram codegen scraper")
resp, err := c.Do(req)
if err != nil {
return nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
return nil, errors.New(resp.Status)
}
return io.ReadAll(resp.Body)
}
+149
View File
@@ -0,0 +1,149 @@
package main
import (
"regexp"
"strings"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
// extractReturn pulls the return type from a method's description prose.
//
// Patterns we handle (in priority order):
//
// "Returns an Array of X" / "On success, an Array of X is returned" → array of named X
// "an array of X of the sent messages is returned" → array of named X
// "the edited X is returned, otherwise True is returned" → XOrBool
// "Returns ... as a X object" / "Returns ... as X object" → named X
// "Returns ... as String on success" → string
// "On success, returns a X object" / "Returns a X object" → named X (indefinite article)
// "On success, an? X is returned" / "On success, the X is returned" → named X
// "Returns True" / "On success, true is returned" → bool
// "Returns the verb-ed X" → named X
// "On success, X is returned" → named X
// "Returns X on success" (no article) → named X
// "in form of a X" → named X
// fallback: bool
func extractReturn(desc string) spec.TypeRef {
// Normalise; strip *bold* markers because Telegram uses italics.
d := strings.ReplaceAll(desc, "*", "")
patterns := []struct {
re *regexp.Regexp
fn func([]string) spec.TypeRef
}{
// Array patterns first — most specific.
{regexp.MustCompile(`Returns an? [Aa]rray of ([A-Z][A-Za-z0-9]+)`), func(m []string) spec.TypeRef {
elem := primitiveOrNamed(m[1])
return spec.TypeRef{Kind: spec.KindArray, ElemType: &elem}
}},
{regexp.MustCompile(`On success(?:,)?\s+(?:an?\s+)?[Aa]rray of ([A-Z][A-Za-z0-9]+)(?:\s+objects?)?\s+(?:is|are|that\s+\S+\s+\S+\s+)?(?:is |are )?returned`), func(m []string) spec.TypeRef {
elem := primitiveOrNamed(m[1])
return spec.TypeRef{Kind: spec.KindArray, ElemType: &elem}
}},
// "an array of X of the sent messages is returned" (ForwardMessages/CopyMessages shape).
{regexp.MustCompile(`(?:[Oo]n success[,.]?\s+)?an? array of ([A-Z][A-Za-z0-9]+)(?:\s+of [^.]+?)?\s+(?:objects\s+)?(?:is|are) returned`), func(m []string) spec.TypeRef {
elem := primitiveOrNamed(m[1])
return spec.TypeRef{Kind: spec.KindArray, ElemType: &elem}
}},
// "Message or True" conditional return → XOrBool sentinel.
{regexp.MustCompile(`the (?:edited|sent|stopped)?\s*([A-Z][A-Za-z0-9]+)\s+is returned, otherwise (?:True|true) is returned`), func(m []string) spec.TypeRef {
return spec.TypeRef{Kind: spec.KindNamed, Name: m[1] + "OrBool"}
}},
// "Returns ... as a X object" / "Returns ... as X object" (with or without article).
{regexp.MustCompile(`[Rr]eturns? (?:.+? )?as (?:an? )?([A-Z][A-Za-z0-9]+) object`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// "Returns ... as String on success" / "Returns ... as X on success" (named type after "as").
{regexp.MustCompile(`[Rr]eturns? (?:.+? )?as ([A-Z][A-Za-z0-9]+) on success`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// Indefinite article: "On success, returns a X object" / "Returns a X object".
{regexp.MustCompile(`(?:[Oo]n success[,.]?\s+)?[Rr]eturns? an? ([A-Z][A-Za-z0-9]+)(?:\s+object)?`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// "On success, an? X is returned" / "On success, the stopped X is returned".
{regexp.MustCompile(`On success,\s+(?:an?|the)?\s*(?:[a-z]+\s+)?([A-Z][A-Za-z0-9]+)(?:\s+object)?\s+is returned`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// Explicit True — must come before the broad "Returns X" pattern.
{regexp.MustCompile(`Returns True`), func(m []string) spec.TypeRef {
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}
}},
{regexp.MustCompile(`(?i)on success, true is returned`), func(m []string) spec.TypeRef {
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}
}},
// "Returns the verb-ed X" — accepts any verb prefix (uploaded, revoked, …).
{regexp.MustCompile(`Returns (?:the|an?)\s+(?:[a-z]+ )?([A-Z][A-Za-z0-9]+)`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// "On success, X is returned" (no article).
{regexp.MustCompile(`On success(?:,)?\s+(?:the\s+)?(?:newly\s+)?(?:edited\s+|sent\s+|created\s+|updated\s+)?([A-Z][A-Za-z0-9]+)\s+is returned`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// "Returns X on success" (no article, e.g. "Returns OwnedGifts on success").
{regexp.MustCompile(`[Rr]eturns ([A-Z][A-Za-z0-9]+) on success`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
// "in form of a X".
{regexp.MustCompile(`in (?:the )?form of (?:a )?([A-Z][A-Za-z0-9]+)`), func(m []string) spec.TypeRef {
return primitiveOrNamed(m[1])
}},
}
for _, p := range patterns {
if m := p.re.FindStringSubmatch(d); m != nil {
return p.fn(m)
}
}
// Fallback: bool. Better than panic; method-by-method tests would
// catch any regression.
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}
}
// hasFilesParams returns true if any param mentions InputFile (the
// scraper convention triggering multipart/form-data).
func hasFilesParams(params []spec.Field) bool {
for _, p := range params {
if mentionsInputFile(p.Type) {
return true
}
}
return false
}
func mentionsInputFile(tr spec.TypeRef) bool {
switch tr.Kind {
case spec.KindNamed:
return tr.Name == "InputFile" || strings.HasPrefix(tr.Name, "InputMedia") || strings.HasPrefix(tr.Name, "InputPaidMedia")
case spec.KindArray:
if tr.ElemType != nil {
return mentionsInputFile(*tr.ElemType)
}
case spec.KindOneOf:
for _, v := range tr.Variants {
if v == "InputFile" || strings.HasPrefix(v, "InputMedia") || strings.HasPrefix(v, "InputPaidMedia") {
return true
}
}
}
return false
}
// extractVersion finds the API version string in a "Bot API X.Y[.Z]" heading.
var versionRE = regexp.MustCompile(`Bot API (\d+\.\d+(?:\.\d+)?)`)
// extractVersion finds the API version string. The live docs page emits
// the version as "<strong>Bot API X.Y</strong>" inside a paragraph below
// a date heading; the small fixture uses an h4 "Bot API X.Y" instead.
// Both shapes are handled here by also scanning section descriptions.
func extractVersion(sections []section) string {
for _, s := range sections {
if m := versionRE.FindStringSubmatch(s.Title); m != nil {
return m[1]
}
if m := versionRE.FindStringSubmatch(s.Description); m != nil {
return m[1]
}
}
return ""
}
+76
View File
@@ -0,0 +1,76 @@
package main
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
func TestExtractReturn(t *testing.T) {
cases := []struct {
in string
want spec.TypeRef
}{
{"Returns basic information about the bot in form of a User object.", spec.TypeRef{Kind: spec.KindNamed, Name: "User"}},
{"On success, the sent Message is returned.", spec.TypeRef{Kind: spec.KindNamed, Name: "Message"}},
{"Returns an Array of Update objects.", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "Update"}}},
{"Returns True on success.", spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}},
{"On success, True is returned.", spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}},
// Issue 5: "Message or True" conditional return → MessageOrBool sentinel.
{"On success, if the edited message is not an inline message, the edited Message is returned, otherwise True is returned.", spec.TypeRef{Kind: spec.KindNamed, Name: "MessageOrBool"}},
// Issue 1: new phrasings.
{"On success, returns a WebhookInfo object.", spec.TypeRef{Kind: spec.KindNamed, Name: "WebhookInfo"}},
{"Returns a UserProfilePhotos object.", spec.TypeRef{Kind: spec.KindNamed, Name: "UserProfilePhotos"}},
{"Returns the uploaded File.", spec.TypeRef{Kind: spec.KindNamed, Name: "File"}},
{"On success, the stopped Poll is returned.", spec.TypeRef{Kind: spec.KindNamed, Name: "Poll"}},
{"On success, an Array of MessageId is returned.", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "MessageId"}}},
{"On success, an array of Message objects that were sent is returned.", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "Message"}}},
// ForwardMessages/CopyMessages shape: "an array of X of the sent messages is returned".
{"On success, an array of MessageId of the sent messages is returned.", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "MessageId"}}},
// "Returns X on success" (no article) — OwnedGifts, StarAmount, Story, MenuButton, etc.
{"Returns the gifts received and owned by a managed business account. Returns OwnedGifts on success.", spec.TypeRef{Kind: spec.KindNamed, Name: "OwnedGifts"}},
{"Returns StarAmount on success.", spec.TypeRef{Kind: spec.KindNamed, Name: "StarAmount"}},
{"Posts a story on behalf of a managed business account. Returns Story on success.", spec.TypeRef{Kind: spec.KindNamed, Name: "Story"}},
{"Returns MenuButton on success.", spec.TypeRef{Kind: spec.KindNamed, Name: "MenuButton"}},
// "Returns ... as X object" (no article before type) — ChatInviteLink variants.
{"Returns the new invite link as ChatInviteLink object.", spec.TypeRef{Kind: spec.KindNamed, Name: "ChatInviteLink"}},
{"Returns the revoked invite link as ChatInviteLink object.", spec.TypeRef{Kind: spec.KindNamed, Name: "ChatInviteLink"}},
// "Returns ... as a X object" (with article) — createForumTopic.
{"Returns information about the created topic as a ForumTopic object.", spec.TypeRef{Kind: spec.KindNamed, Name: "ForumTopic"}},
// "Returns ... as String on success" — exportChatInviteLink / createInvoiceLink.
{"Returns the new invite link as String on success.", spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}},
{"Returns the created invoice link as String on success.", spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}},
// "Returns Int on success" — getChatMemberCount.
{"Returns Int on success.", spec.TypeRef{Kind: spec.KindPrimitive, Name: "int64"}},
}
for _, c := range cases {
require.Equal(t, c.want, extractReturn(c.in), c.in)
}
}
func TestHasFilesParams(t *testing.T) {
require.True(t, hasFilesParams([]spec.Field{
{Type: spec.TypeRef{Kind: spec.KindNamed, Name: "InputFile"}},
}))
require.True(t, hasFilesParams([]spec.Field{
{Type: spec.TypeRef{Kind: spec.KindOneOf, Variants: []string{"InputFile", "string"}}},
}))
require.False(t, hasFilesParams([]spec.Field{
{Type: spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}},
}))
// Issue 2: Array of InputMedia* union triggers HasFiles.
require.True(t, hasFilesParams([]spec.Field{
{Type: spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindOneOf, Variants: []string{"InputMediaPhoto", "InputMediaVideo"}}}},
}))
}
func TestExtractVersion(t *testing.T) {
sections := []section{{Title: "Recent changes"}, {Title: "Bot API 7.10"}, {Title: "Available types"}}
require.Equal(t, "7.10", extractVersion(sections))
// Issue 4: 3-part version must not be truncated.
sections3 := []section{{Description: "Bot API 8.0.1"}}
require.Equal(t, "8.0.1", extractVersion(sections3))
}
+84
View File
@@ -0,0 +1,84 @@
package main
import (
"bytes"
"fmt"
"github.com/goccy/go-json"
"os"
"golang.org/x/net/html"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
// scrape (the package-level implementation overriding the stub in main.go;
// remove the stub from main.go in this task) parses the docs HTML into IR.
func scrape(htmlBytes []byte) (*spec.API, error) {
doc, err := html.Parse(bytes.NewReader(htmlBytes))
if err != nil {
return nil, fmt.Errorf("html parse: %w", err)
}
sections := walk(doc)
api := &spec.API{Version: extractVersion(sections)}
for _, s := range sections {
switch {
case isMethodTitle(s.Title):
api.Methods = append(api.Methods, methodFromSection(s))
case isTypeTitle(s.Title):
api.Types = append(api.Types, typeFromSection(s))
}
}
return api, nil
}
func typeFromSection(s section) spec.TypeDecl {
td := spec.TypeDecl{Name: s.Title, Doc: s.Description}
if len(s.Tables) > 0 {
td.Fields = parseFieldsTable(s.Tables[0])
} else if len(s.Lists) > 0 {
// Union: extract variant names from <li><a>...</a></li>.
td.OneOf = extractListLinks(s.Lists[0])
}
return td
}
func methodFromSection(s section) spec.MethodDecl {
md := spec.MethodDecl{Name: s.Title, Doc: s.Description, Returns: extractReturn(s.Description)}
if len(s.Tables) > 0 {
md.Params = parseParamsTable(s.Tables[0])
}
md.HasFiles = hasFilesParams(md.Params)
return md
}
// extractListLinks pulls anchor texts out of a <ul>: each <li><a>X</a></li>
// contributes "X" to the result. Used for union variant lists.
func extractListLinks(ul *html.Node) []string {
var names []string
var visit func(*html.Node)
visit = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
names = append(names, textOf(n))
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(ul)
return names
}
// writeJSON marshals the IR with stable, human-readable formatting and
// writes it to path. Marshalling is deterministic: types and methods
// preserve scrape order; struct fields use IR-defined order.
func writeJSON(path string, api *spec.API) error {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.SetIndent("", " ")
enc.SetEscapeHTML(false)
if err := enc.Encode(api); err != nil {
return err
}
return os.WriteFile(path, buf.Bytes(), 0o644)
}
+36
View File
@@ -0,0 +1,36 @@
package main
import (
"bytes"
"encoding/json"
"flag"
"os"
"testing"
"github.com/stretchr/testify/require"
)
var update = flag.Bool("update", false, "update golden files")
func TestScrape_Golden_SmallFixture(t *testing.T) {
htmlBytes, err := os.ReadFile("../../testdata/html/small_fixture.html")
require.NoError(t, err)
api, err := scrape(htmlBytes)
require.NoError(t, err)
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.SetIndent("", " ")
enc.SetEscapeHTML(false)
require.NoError(t, enc.Encode(api))
goldenPath := "../../testdata/golden/api_small_fixture.json"
if *update {
require.NoError(t, os.WriteFile(goldenPath, buf.Bytes(), 0o644))
return
}
expected, err := os.ReadFile(goldenPath)
require.NoError(t, err, "missing golden; run `go test -update ./cmd/scrape/...` to create")
require.Equal(t, string(expected), buf.String())
}
+224
View File
@@ -0,0 +1,224 @@
package main
import (
"strings"
"golang.org/x/net/html"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
// parseFieldsTable walks a <table> for an object-type definition.
// Columns: Field, Type, Description (optional column orders are not
// supported; Telegram's docs use a stable layout).
//
// Optional fields are detected via the "Optional." prefix in the
// description text, which is the documented convention.
func parseFieldsTable(t *html.Node) []spec.Field {
rows := tableRows(t)
if len(rows) == 0 {
return nil
}
var fields []spec.Field
for _, row := range rows[1:] { // skip header
cells := rowCells(row)
if len(cells) < 3 {
continue
}
jname := strings.TrimSpace(textOf(cells[0]))
typeText := strings.TrimSpace(textOf(cells[1]))
desc := strings.TrimSpace(textOf(cells[2]))
required := !strings.HasPrefix(desc, "Optional.")
fields = append(fields, spec.Field{
Name: goName(jname),
JSONName: jname,
Type: parseTypeRef(typeText),
Required: required,
Doc: desc,
})
}
return fields
}
// parseParamsTable walks a <table> for a method definition.
// Columns: Parameter, Type, Required, Description.
func parseParamsTable(t *html.Node) []spec.Field {
rows := tableRows(t)
if len(rows) == 0 {
return nil
}
var params []spec.Field
for _, row := range rows[1:] {
cells := rowCells(row)
if len(cells) < 4 {
continue
}
jname := strings.TrimSpace(textOf(cells[0]))
typeText := strings.TrimSpace(textOf(cells[1]))
req := strings.EqualFold(strings.TrimSpace(textOf(cells[2])), "Yes")
desc := strings.TrimSpace(textOf(cells[3]))
params = append(params, spec.Field{
Name: goName(jname),
JSONName: jname,
Type: parseTypeRef(typeText),
Required: req,
Doc: desc,
})
}
return params
}
// tableRows returns the <tr> children of a <table>, skipping over
// any <thead>/<tbody> wrappers.
func tableRows(t *html.Node) []*html.Node {
var rows []*html.Node
var visit func(*html.Node)
visit = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "tr" {
rows = append(rows, n)
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(t)
return rows
}
// rowCells returns the <td> (or <th>) children of a <tr>.
func rowCells(tr *html.Node) []*html.Node {
var cells []*html.Node
for c := tr.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && (c.Data == "td" || c.Data == "th") {
cells = append(cells, c)
}
}
return cells
}
// goName converts a snake_case JSON identifier to PascalCase.
// Special-cases common acronyms used in the Telegram docs.
func goName(s string) string {
if s == "" {
return ""
}
parts := strings.Split(s, "_")
var b strings.Builder
for _, p := range parts {
if p == "" {
continue
}
switch p {
case "id":
b.WriteString("ID")
case "url":
b.WriteString("URL")
case "ip":
b.WriteString("IP")
case "https":
b.WriteString("HTTPS")
case "json":
b.WriteString("JSON")
case "html":
b.WriteString("HTML")
default:
if p[0] >= 'a' && p[0] <= 'z' {
b.WriteByte(p[0] - 'a' + 'A')
b.WriteString(p[1:])
} else {
b.WriteString(p)
}
}
}
return b.String()
}
// parseTypeRef decodes the type-cell text into a spec.TypeRef.
//
// Recognised shapes:
//
// "Integer" → primitive int64
// "String" → primitive string
// "Boolean" / "True" → primitive bool
// "Float" / "Float number"→ primitive float64
// "Array of X" → array of (parseTypeRef of X)
// "Array of Array of X" → array of array of X
// "Foo" → named Foo
// "Foo or Bar" → oneOf {Foo, Bar}
// "InputFile or String" → oneOf (caller may translate to InputFile)
//
// parseTypeRef decodes the type-cell text into a spec.TypeRef.
//
// Recognised shapes:
//
// "Integer" → primitive int64
// "String" → primitive string
// "Boolean" / "True" → primitive bool
// "Float" / "Float number"→ primitive float64
// "Array of X" → array of (parseTypeRef of X)
// "Array of Array of X" → array of array of X
// "Foo" → named Foo
// "Foo or Bar" → oneOf {Foo, Bar}
// "Foo, Bar and Baz" → oneOf {Foo, Bar, Baz} (Telegram's comma+and union form)
// "InputFile or String" → oneOf (caller may translate to InputFile)
func parseTypeRef(s string) spec.TypeRef {
s = strings.TrimSpace(s)
// Array prefix.
if rest, ok := strings.CutPrefix(s, "Array of "); ok {
elem := parseTypeRef(rest)
return spec.TypeRef{Kind: spec.KindArray, ElemType: &elem}
}
// Comma-and union ("X, Y, Z and W") — used by Telegram for ≥3-variant unions.
if strings.Contains(s, ", ") && strings.Contains(s, " and ") {
parts := splitCommaAnd(s)
variants := make([]string, 0, len(parts))
for _, p := range parts {
variants = append(variants, primitiveOrNamed(strings.TrimSpace(p)).Name)
}
return spec.TypeRef{Kind: spec.KindOneOf, Variants: variants}
}
// "X or Y" union (the 2-variant form).
if strings.Contains(s, " or ") {
parts := strings.Split(s, " or ")
variants := make([]string, 0, len(parts))
for _, p := range parts {
variants = append(variants, primitiveOrNamed(strings.TrimSpace(p)).Name)
}
return spec.TypeRef{Kind: spec.KindOneOf, Variants: variants}
}
return primitiveOrNamed(s)
}
// splitCommaAnd splits "A, B, C and D" → ["A", "B", "C", "D"].
func splitCommaAnd(s string) []string {
// Replace " and " with ", " then split on ", ".
s = strings.ReplaceAll(s, " and ", ", ")
parts := strings.Split(s, ", ")
out := make([]string, 0, len(parts))
for _, p := range parts {
if p = strings.TrimSpace(p); p != "" {
out = append(out, p)
}
}
return out
}
// primitiveOrNamed maps a single-word type cell to either a primitive
// or a named TypeRef. Unrecognised words are treated as named types.
func primitiveOrNamed(s string) spec.TypeRef {
switch s {
case "Integer", "Int":
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "int64"}
case "String":
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}
case "Boolean", "Bool", "True", "False":
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}
case "Float", "Float number":
return spec.TypeRef{Kind: spec.KindPrimitive, Name: "float64"}
default:
return spec.TypeRef{Kind: spec.KindNamed, Name: s}
}
}
+92
View File
@@ -0,0 +1,92 @@
package main
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/lukaszraczylo/go-telegram/internal/spec"
)
func TestGoName(t *testing.T) {
cases := []struct{ in, want string }{
{"chat_id", "ChatID"},
{"first_name", "FirstName"},
{"is_bot", "IsBot"},
{"url", "URL"},
{"ip_address", "IPAddress"},
{"language_code", "LanguageCode"},
{"webhook_URL", "WebhookURL"}, // Issue 3: already-uppercase segment must not be corrupted.
}
for _, c := range cases {
require.Equal(t, c.want, goName(c.in), c.in)
}
}
func TestParseTypeRef(t *testing.T) {
cases := []struct {
in string
want spec.TypeRef
}{
{"Integer", spec.TypeRef{Kind: spec.KindPrimitive, Name: "int64"}},
{"String", spec.TypeRef{Kind: spec.KindPrimitive, Name: "string"}},
{"Boolean", spec.TypeRef{Kind: spec.KindPrimitive, Name: "bool"}},
{"Float", spec.TypeRef{Kind: spec.KindPrimitive, Name: "float64"}},
{"Message", spec.TypeRef{Kind: spec.KindNamed, Name: "Message"}},
{"Array of Update", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "Update"}}},
{"Array of Array of PhotoSize", spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindArray, ElemType: &spec.TypeRef{Kind: spec.KindNamed, Name: "PhotoSize"}}}},
{"Integer or String", spec.TypeRef{Kind: spec.KindOneOf, Variants: []string{"int64", "string"}}},
{"InputFile or String", spec.TypeRef{Kind: spec.KindOneOf, Variants: []string{"InputFile", "string"}}},
}
for _, c := range cases {
require.Equal(t, c.want, parseTypeRef(c.in), c.in)
}
}
func TestParseFieldsTable_FromFixture(t *testing.T) {
doc := parse(t, "../../testdata/html/small_fixture.html")
sections := walk(doc)
var user *section
for i := range sections {
if sections[i].Title == "User" {
user = &sections[i]
break
}
}
require.NotNil(t, user)
require.Len(t, user.Tables, 1)
fields := parseFieldsTable(user.Tables[0])
require.Len(t, fields, 4)
require.Equal(t, "ID", fields[0].Name)
require.Equal(t, "id", fields[0].JSONName)
require.Equal(t, spec.KindPrimitive, fields[0].Type.Kind)
require.True(t, fields[0].Required)
require.Equal(t, "LastName", fields[3].Name)
require.False(t, fields[3].Required) // "Optional." prefix
}
func TestParseParamsTable_FromFixture(t *testing.T) {
doc := parse(t, "../../testdata/html/small_fixture.html")
sections := walk(doc)
var sm *section
for i := range sections {
if sections[i].Title == "sendMessage" {
sm = &sections[i]
break
}
}
require.NotNil(t, sm)
require.Len(t, sm.Tables, 1)
params := parseParamsTable(sm.Tables[0])
require.Len(t, params, 3)
require.Equal(t, "ChatID", params[0].Name)
require.True(t, params[0].Required)
require.Equal(t, spec.KindOneOf, params[0].Type.Kind)
require.Equal(t, []string{"int64", "string"}, params[0].Type.Variants)
require.Equal(t, "ParseMode", params[2].Name)
require.False(t, params[2].Required) // "Optional"
}
+137
View File
@@ -0,0 +1,137 @@
package main
import (
"strings"
"golang.org/x/net/html"
)
// section is an h4-anchored block of the docs page. Title is the
// heading text (e.g. "User" or "sendMessage"). Description is the
// concatenation of immediately-following <p> paragraphs (until the
// next h4 / h3 / table / list). Tables and Lists hold raw nodes for
// later parsing by the table/oneof extractors.
type section struct {
Title string
Description string
Tables []*html.Node // <table> nodes
Lists []*html.Node // <ul> nodes (used for oneof variant lists)
}
// walk parses the page and returns sections in document order.
// Sections whose title contains a space (e.g. "Bot API 7.10") are
// included; later passes ignore them or treat them specially.
func walk(doc *html.Node) []section {
var (
sections []section
current *section
)
var visit func(n *html.Node)
visit = func(n *html.Node) {
if n.Type == html.ElementNode {
switch n.Data {
case "h4":
if current != nil {
sections = append(sections, *current)
}
current = &section{Title: textOf(n)}
// Don't recurse into the heading; we already have its text.
return
case "h3":
// h3 (e.g. "Available methods") delimits a section;
// flush the current h4 section but do not start a new one.
if current != nil {
sections = append(sections, *current)
current = nil
}
return
case "p":
if current != nil {
if current.Description != "" {
current.Description += "\n"
}
current.Description += strings.TrimSpace(textOf(n))
}
return
case "table":
if current != nil {
current.Tables = append(current.Tables, n)
}
return
case "ul":
if current != nil {
current.Lists = append(current.Lists, n)
}
return
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(doc)
if current != nil {
sections = append(sections, *current)
}
return sections
}
// textOf returns the concatenated text content of n and descendants,
// with adjacent whitespace collapsed to single spaces.
func textOf(n *html.Node) string {
var sb strings.Builder
var w func(*html.Node)
w = func(n *html.Node) {
if n.Type == html.TextNode {
sb.WriteString(n.Data)
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
w(c)
}
}
w(n)
return collapseWS(sb.String())
}
func collapseWS(s string) string {
var b strings.Builder
prevSpace := false
for _, r := range s {
if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
if !prevSpace {
b.WriteByte(' ')
}
prevSpace = true
continue
}
prevSpace = false
b.WriteRune(r)
}
return strings.TrimSpace(b.String())
}
// isMethodTitle returns true for headings that look like method names
// (camelCase starting with a lowercase letter; e.g. "sendMessage").
func isMethodTitle(s string) bool {
if s == "" {
return false
}
r := s[0]
return r >= 'a' && r <= 'z'
}
// isTypeTitle returns true for headings that look like type names
// (PascalCase; e.g. "Message"). Allows a leading-uppercase only;
// excludes spaces (which would denote a header like "Bot API 7.10").
func isTypeTitle(s string) bool {
if s == "" {
return false
}
r := s[0]
if r < 'A' || r > 'Z' {
return false
}
return !strings.Contains(s, " ")
}
+69
View File
@@ -0,0 +1,69 @@
package main
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/require"
"golang.org/x/net/html"
)
func parse(t *testing.T, path string) *html.Node {
t.Helper()
f, err := os.Open(path)
require.NoError(t, err)
defer f.Close()
doc, err := html.Parse(f)
require.NoError(t, err)
return doc
}
func TestWalk_FixtureSections(t *testing.T) {
doc := parse(t, "../../testdata/html/small_fixture.html")
sections := walk(doc)
titles := make([]string, 0, len(sections))
for _, s := range sections {
titles = append(titles, s.Title)
}
require.Contains(t, titles, "User")
require.Contains(t, titles, "ChatMember")
require.Contains(t, titles, "getMe")
require.Contains(t, titles, "sendMessage")
require.Contains(t, titles, "sendDocument")
require.Contains(t, titles, "getUpdates")
require.Contains(t, titles, "Bot API 7.10")
}
func TestIsMethodTitle(t *testing.T) {
require.True(t, isMethodTitle("sendMessage"))
require.True(t, isMethodTitle("getMe"))
require.False(t, isMethodTitle("Message"))
require.False(t, isMethodTitle(""))
require.False(t, isMethodTitle("Bot API 7.10"))
}
func TestIsTypeTitle(t *testing.T) {
require.True(t, isTypeTitle("Message"))
require.True(t, isTypeTitle("ChatMember"))
require.False(t, isTypeTitle("sendMessage"))
require.False(t, isTypeTitle("Bot API 7.10"))
require.False(t, isTypeTitle(""))
}
func TestSection_DescriptionAndTables(t *testing.T) {
doc := parse(t, "../../testdata/html/small_fixture.html")
sections := walk(doc)
var sm *section
for i, s := range sections {
if s.Title == "sendMessage" {
sm = &sections[i]
break
}
}
require.NotNil(t, sm)
require.True(t, strings.Contains(sm.Description, "Use this method to send text messages"))
require.Len(t, sm.Tables, 1)
}