torznabtoweb/oceanpdf/oceanpdf.go

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"net/url"
	"strings"

	"github.com/PuerkitoBio/goquery"
)

type Book struct {
	Title       string   `json:"title"`
	Author      string   `json:"author"`
	Genres      []string `json:"genres"`
	DownloadURL string   `json:"download_url"`
	CoverURL    string   `json:"cover_url"`
	PublishDate string   `json:"publish_date"`
}

type OceanPDFParser struct {
	baseURL string
	client  *http.Client
}

func NewOceanPDFParser() *OceanPDFParser {
	return &OceanPDFParser{
		baseURL: "https://oceanofpdf.com",
		client:  &http.Client{},
	}
}

func (p *OceanPDFParser) Search(query string) ([]Book, error) {
	searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))

	resp, err := p.client.Get(searchURL)
	if err != nil {
		return nil, fmt.Errorf("search request failed: %v", err)
	}
	defer resp.Body.Close()

	doc, err := goquery.NewDocumentFromReader(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to parse HTML: %v", err)
	}

	var books []Book
	doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
		book := Book{}

		// Extract title and download URL
		titleLink := s.Find("h2.entry-title a")
		book.Title = strings.TrimSpace(titleLink.Text())
		book.DownloadURL, _ = titleLink.Attr("href")

		// Extract author and genres
		metaInfo := s.Find("div.postmetainfo")
		metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
			label := strings.TrimSpace(meta.Text())
			value := strings.TrimSpace(meta.Next().Text())

			switch label {
			case "Author:":
				book.Author = value
			case "Genre:":
				book.Genres = strings.Split(value, ", ")
			}
		})

		// Extract cover image URL
		coverImg := s.Find("img.post-image")
		book.CoverURL, _ = coverImg.Attr("data-src")

		// Extract publish date
		book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())

		books = append(books, book)
	})

	return books, nil
}

func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
	resp, err := p.client.Get(bookURL)
	if err != nil {
		return nil, fmt.Errorf("failed to get book page: %v", err)
	}
	defer resp.Body.Close()

	doc, err := goquery.NewDocumentFromReader(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to parse book page: %v", err)
	}

	links := make(map[string]string)
	doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
		format := strings.TrimSpace(s.Text())
		href, exists := s.Attr("href")
		if exists {
			links[format] = href
		}
	})

	return links, nil
}

func main() {
	parser := NewOceanPDFParser()

	// Example search
	books, err := parser.Search("programming")
	if err != nil {
		log.Fatal(err)
	}

	// Print results as JSON
	output, _ := json.MarshalIndent(books, "", "  ")
	fmt.Println(string(output))
}