torznabtoweb/oceanpdf/oceanpdf.go
2025-01-03 19:52:57 +02:00

122 lines
2.7 KiB
Go

package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
type Book struct {
Title string `json:"title"`
Author string `json:"author"`
Genres []string `json:"genres"`
DownloadURL string `json:"download_url"`
CoverURL string `json:"cover_url"`
PublishDate string `json:"publish_date"`
}
type OceanPDFParser struct {
baseURL string
client *http.Client
}
func NewOceanPDFParser() *OceanPDFParser {
return &OceanPDFParser{
baseURL: "https://oceanofpdf.com",
client: &http.Client{},
}
}
func (p *OceanPDFParser) Search(query string) ([]Book, error) {
searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))
resp, err := p.client.Get(searchURL)
if err != nil {
return nil, fmt.Errorf("search request failed: %v", err)
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse HTML: %v", err)
}
var books []Book
doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
book := Book{}
// Extract title and download URL
titleLink := s.Find("h2.entry-title a")
book.Title = strings.TrimSpace(titleLink.Text())
book.DownloadURL, _ = titleLink.Attr("href")
// Extract author and genres
metaInfo := s.Find("div.postmetainfo")
metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
label := strings.TrimSpace(meta.Text())
value := strings.TrimSpace(meta.Next().Text())
switch label {
case "Author:":
book.Author = value
case "Genre:":
book.Genres = strings.Split(value, ", ")
}
})
// Extract cover image URL
coverImg := s.Find("img.post-image")
book.CoverURL, _ = coverImg.Attr("data-src")
// Extract publish date
book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())
books = append(books, book)
})
return books, nil
}
func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
resp, err := p.client.Get(bookURL)
if err != nil {
return nil, fmt.Errorf("failed to get book page: %v", err)
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse book page: %v", err)
}
links := make(map[string]string)
doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
format := strings.TrimSpace(s.Text())
href, exists := s.Attr("href")
if exists {
links[format] = href
}
})
return links, nil
}
func main() {
parser := NewOceanPDFParser()
// Example search
books, err := parser.Search("programming")
if err != nil {
log.Fatal(err)
}
// Print results as JSON
output, _ := json.MarshalIndent(books, "", " ")
fmt.Println(string(output))
}