Random tests

This commit is contained in:
2025-01-03 19:52:57 +02:00
parent 173a49cb9e
commit 5f494aae09
8 changed files with 1089 additions and 1 deletions

View File

@@ -0,0 +1,66 @@
package main
import (
"context"
"log"
"time"
"github.com/chromedp/chromedp"
)
func main() {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", false), // Disable headless mode
chromedp.Flag("show-automation", true), // Show automation
chromedp.Flag("enable-automation", true), // Enable automation
chromedp.Flag("disable-extensions", false), // Enable extensions
chromedp.Flag("start-maximized", true), // Start maximized
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// Create context with logging
ctx, cancel := chromedp.NewContext(allocCtx, chromedp.WithDebugf(log.Printf))
defer cancel()
// Set timeout
ctx, cancel = context.WithTimeout(ctx, 30*time.Second)
defer cancel()
var htmlContent string
// Navigate and perform actions
err := chromedp.Run(ctx,
chromedp.Navigate("https://oceanofpdf.com"),
chromedp.WaitVisible(`input[class="sf_input"]`),
chromedp.SendKeys(`input[class="sf_input"]`, "programming"),
chromedp.WaitVisible(`button[type="submit"]`),
//chromedp.SendKeys(`input[class="sf_input"]`, kb.Enter),
chromedp.Click(`button[type="submit"]`),
chromedp.Sleep(10*time.Second), // Wait to see results
chromedp.OuterHTML("html", &htmlContent),
)
if err != nil {
log.Fatal(err)
}
log.Printf("HTML Content: %s", htmlContent)
}
// SearchBooks performs a search with JavaScript rendering
func SearchBooks(query string) error {
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
return chromedp.Run(ctx,
// Navigate to search page
chromedp.Navigate(`https://oceanofpdf.com/?s=`+query),
// Wait for search input to be ready
chromedp.WaitVisible(`input[type="search"]`),
// Type search query
chromedp.SendKeys(`input[type="search"]`, query),
// Click search button
chromedp.Click(`button[type="submit"]`),
// Wait for results
chromedp.WaitVisible(`.post`),
)
}

121
oceanpdf/oceanpdf.go Normal file
View File

@@ -0,0 +1,121 @@
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
type Book struct {
Title string `json:"title"`
Author string `json:"author"`
Genres []string `json:"genres"`
DownloadURL string `json:"download_url"`
CoverURL string `json:"cover_url"`
PublishDate string `json:"publish_date"`
}
type OceanPDFParser struct {
baseURL string
client *http.Client
}
func NewOceanPDFParser() *OceanPDFParser {
return &OceanPDFParser{
baseURL: "https://oceanofpdf.com",
client: &http.Client{},
}
}
func (p *OceanPDFParser) Search(query string) ([]Book, error) {
searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))
resp, err := p.client.Get(searchURL)
if err != nil {
return nil, fmt.Errorf("search request failed: %v", err)
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse HTML: %v", err)
}
var books []Book
doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
book := Book{}
// Extract title and download URL
titleLink := s.Find("h2.entry-title a")
book.Title = strings.TrimSpace(titleLink.Text())
book.DownloadURL, _ = titleLink.Attr("href")
// Extract author and genres
metaInfo := s.Find("div.postmetainfo")
metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
label := strings.TrimSpace(meta.Text())
value := strings.TrimSpace(meta.Next().Text())
switch label {
case "Author:":
book.Author = value
case "Genre:":
book.Genres = strings.Split(value, ", ")
}
})
// Extract cover image URL
coverImg := s.Find("img.post-image")
book.CoverURL, _ = coverImg.Attr("data-src")
// Extract publish date
book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())
books = append(books, book)
})
return books, nil
}
func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
resp, err := p.client.Get(bookURL)
if err != nil {
return nil, fmt.Errorf("failed to get book page: %v", err)
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse book page: %v", err)
}
links := make(map[string]string)
doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
format := strings.TrimSpace(s.Text())
href, exists := s.Attr("href")
if exists {
links[format] = href
}
})
return links, nil
}
func main() {
parser := NewOceanPDFParser()
// Example search
books, err := parser.Search("programming")
if err != nil {
log.Fatal(err)
}
// Print results as JSON
output, _ := json.MarshalIndent(books, "", " ")
fmt.Println(string(output))
}