Random tests
This commit is contained in:
66
oceanpdf/emulator/emulator.go
Normal file
66
oceanpdf/emulator/emulator.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/chromedp/chromedp"
|
||||
)
|
||||
|
||||
func main() {
|
||||
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
||||
chromedp.Flag("headless", false), // Disable headless mode
|
||||
chromedp.Flag("show-automation", true), // Show automation
|
||||
chromedp.Flag("enable-automation", true), // Enable automation
|
||||
chromedp.Flag("disable-extensions", false), // Enable extensions
|
||||
chromedp.Flag("start-maximized", true), // Start maximized
|
||||
)
|
||||
|
||||
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
||||
defer cancel()
|
||||
|
||||
// Create context with logging
|
||||
ctx, cancel := chromedp.NewContext(allocCtx, chromedp.WithDebugf(log.Printf))
|
||||
defer cancel()
|
||||
|
||||
// Set timeout
|
||||
ctx, cancel = context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
var htmlContent string
|
||||
// Navigate and perform actions
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Navigate("https://oceanofpdf.com"),
|
||||
chromedp.WaitVisible(`input[class="sf_input"]`),
|
||||
chromedp.SendKeys(`input[class="sf_input"]`, "programming"),
|
||||
chromedp.WaitVisible(`button[type="submit"]`),
|
||||
//chromedp.SendKeys(`input[class="sf_input"]`, kb.Enter),
|
||||
chromedp.Click(`button[type="submit"]`),
|
||||
chromedp.Sleep(10*time.Second), // Wait to see results
|
||||
chromedp.OuterHTML("html", &htmlContent),
|
||||
)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
log.Printf("HTML Content: %s", htmlContent)
|
||||
}
|
||||
|
||||
// SearchBooks performs a search with JavaScript rendering
|
||||
func SearchBooks(query string) error {
|
||||
ctx, cancel := chromedp.NewContext(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return chromedp.Run(ctx,
|
||||
// Navigate to search page
|
||||
chromedp.Navigate(`https://oceanofpdf.com/?s=`+query),
|
||||
// Wait for search input to be ready
|
||||
chromedp.WaitVisible(`input[type="search"]`),
|
||||
// Type search query
|
||||
chromedp.SendKeys(`input[type="search"]`, query),
|
||||
// Click search button
|
||||
chromedp.Click(`button[type="submit"]`),
|
||||
// Wait for results
|
||||
chromedp.WaitVisible(`.post`),
|
||||
)
|
||||
}
|
||||
121
oceanpdf/oceanpdf.go
Normal file
121
oceanpdf/oceanpdf.go
Normal file
@@ -0,0 +1,121 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Book struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Genres []string `json:"genres"`
|
||||
DownloadURL string `json:"download_url"`
|
||||
CoverURL string `json:"cover_url"`
|
||||
PublishDate string `json:"publish_date"`
|
||||
}
|
||||
|
||||
type OceanPDFParser struct {
|
||||
baseURL string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func NewOceanPDFParser() *OceanPDFParser {
|
||||
return &OceanPDFParser{
|
||||
baseURL: "https://oceanofpdf.com",
|
||||
client: &http.Client{},
|
||||
}
|
||||
}
|
||||
|
||||
func (p *OceanPDFParser) Search(query string) ([]Book, error) {
|
||||
searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))
|
||||
|
||||
resp, err := p.client.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
var books []Book
|
||||
doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
|
||||
book := Book{}
|
||||
|
||||
// Extract title and download URL
|
||||
titleLink := s.Find("h2.entry-title a")
|
||||
book.Title = strings.TrimSpace(titleLink.Text())
|
||||
book.DownloadURL, _ = titleLink.Attr("href")
|
||||
|
||||
// Extract author and genres
|
||||
metaInfo := s.Find("div.postmetainfo")
|
||||
metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
|
||||
label := strings.TrimSpace(meta.Text())
|
||||
value := strings.TrimSpace(meta.Next().Text())
|
||||
|
||||
switch label {
|
||||
case "Author:":
|
||||
book.Author = value
|
||||
case "Genre:":
|
||||
book.Genres = strings.Split(value, ", ")
|
||||
}
|
||||
})
|
||||
|
||||
// Extract cover image URL
|
||||
coverImg := s.Find("img.post-image")
|
||||
book.CoverURL, _ = coverImg.Attr("data-src")
|
||||
|
||||
// Extract publish date
|
||||
book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())
|
||||
|
||||
books = append(books, book)
|
||||
})
|
||||
|
||||
return books, nil
|
||||
}
|
||||
|
||||
func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
|
||||
resp, err := p.client.Get(bookURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get book page: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse book page: %v", err)
|
||||
}
|
||||
|
||||
links := make(map[string]string)
|
||||
doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
|
||||
format := strings.TrimSpace(s.Text())
|
||||
href, exists := s.Attr("href")
|
||||
if exists {
|
||||
links[format] = href
|
||||
}
|
||||
})
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
parser := NewOceanPDFParser()
|
||||
|
||||
// Example search
|
||||
books, err := parser.Search("programming")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Print results as JSON
|
||||
output, _ := json.MarshalIndent(books, "", " ")
|
||||
fmt.Println(string(output))
|
||||
}
|
||||
Reference in New Issue
Block a user