Random tests

2025-01-03 19:52:57 +02:00
parent 173a49cb9e
commit 5f494aae09
8 changed files with 1089 additions and 1 deletions
--- a/oceanpdf/emulator/emulator.go
+++ b/oceanpdf/emulator/emulator.go
@@ -0,0 +1,66 @@
+package main
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"github.com/chromedp/chromedp"
+)
+
+func main() {
+	opts := append(chromedp.DefaultExecAllocatorOptions[:],
+		chromedp.Flag("headless", false),           // Disable headless mode
+		chromedp.Flag("show-automation", true),     // Show automation
+		chromedp.Flag("enable-automation", true),   // Enable automation
+		chromedp.Flag("disable-extensions", false), // Enable extensions
+		chromedp.Flag("start-maximized", true),     // Start maximized
+	)
+
+	allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
+	defer cancel()
+
+	// Create context with logging
+	ctx, cancel := chromedp.NewContext(allocCtx, chromedp.WithDebugf(log.Printf))
+	defer cancel()
+
+	// Set timeout
+	ctx, cancel = context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+	var htmlContent string
+	// Navigate and perform actions
+	err := chromedp.Run(ctx,
+		chromedp.Navigate("https://oceanofpdf.com"),
+		chromedp.WaitVisible(`input[class="sf_input"]`),
+		chromedp.SendKeys(`input[class="sf_input"]`, "programming"),
+		chromedp.WaitVisible(`button[type="submit"]`),
+		//chromedp.SendKeys(`input[class="sf_input"]`, kb.Enter),
+		chromedp.Click(`button[type="submit"]`),
+		chromedp.Sleep(10*time.Second), // Wait to see results
+		chromedp.OuterHTML("html", &htmlContent),
+	)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	log.Printf("HTML Content: %s", htmlContent)
+}
+
+// SearchBooks performs a search with JavaScript rendering
+func SearchBooks(query string) error {
+	ctx, cancel := chromedp.NewContext(context.Background())
+	defer cancel()
+
+	return chromedp.Run(ctx,
+		// Navigate to search page
+		chromedp.Navigate(`https://oceanofpdf.com/?s=`+query),
+		// Wait for search input to be ready
+		chromedp.WaitVisible(`input[type="search"]`),
+		// Type search query
+		chromedp.SendKeys(`input[type="search"]`, query),
+		// Click search button
+		chromedp.Click(`button[type="submit"]`),
+		// Wait for results
+		chromedp.WaitVisible(`.post`),
+	)
+}
--- a/oceanpdf/oceanpdf.go
+++ b/oceanpdf/oceanpdf.go
@@ -0,0 +1,121 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+type Book struct {
+	Title       string   `json:"title"`
+	Author      string   `json:"author"`
+	Genres      []string `json:"genres"`
+	DownloadURL string   `json:"download_url"`
+	CoverURL    string   `json:"cover_url"`
+	PublishDate string   `json:"publish_date"`
+}
+
+type OceanPDFParser struct {
+	baseURL string
+	client  *http.Client
+}
+
+func NewOceanPDFParser() *OceanPDFParser {
+	return &OceanPDFParser{
+		baseURL: "https://oceanofpdf.com",
+		client:  &http.Client{},
+	}
+}
+
+func (p *OceanPDFParser) Search(query string) ([]Book, error) {
+	searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))
+
+	resp, err := p.client.Get(searchURL)
+	if err != nil {
+		return nil, fmt.Errorf("search request failed: %v", err)
+	}
+	defer resp.Body.Close()
+
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse HTML: %v", err)
+	}
+
+	var books []Book
+	doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
+		book := Book{}
+
+		// Extract title and download URL
+		titleLink := s.Find("h2.entry-title a")
+		book.Title = strings.TrimSpace(titleLink.Text())
+		book.DownloadURL, _ = titleLink.Attr("href")
+
+		// Extract author and genres
+		metaInfo := s.Find("div.postmetainfo")
+		metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
+			label := strings.TrimSpace(meta.Text())
+			value := strings.TrimSpace(meta.Next().Text())
+
+			switch label {
+			case "Author:":
+				book.Author = value
+			case "Genre:":
+				book.Genres = strings.Split(value, ", ")
+			}
+		})
+
+		// Extract cover image URL
+		coverImg := s.Find("img.post-image")
+		book.CoverURL, _ = coverImg.Attr("data-src")
+
+		// Extract publish date
+		book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())
+
+		books = append(books, book)
+	})
+
+	return books, nil
+}
+
+func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
+	resp, err := p.client.Get(bookURL)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get book page: %v", err)
+	}
+	defer resp.Body.Close()
+
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse book page: %v", err)
+	}
+
+	links := make(map[string]string)
+	doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
+		format := strings.TrimSpace(s.Text())
+		href, exists := s.Attr("href")
+		if exists {
+			links[format] = href
+		}
+	})
+
+	return links, nil
+}
+
+func main() {
+	parser := NewOceanPDFParser()
+
+	// Example search
+	books, err := parser.Search("programming")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// Print results as JSON
+	output, _ := json.MarshalIndent(books, "", "  ")
+	fmt.Println(string(output))
+}