122 lines
2.7 KiB
Go
122 lines
2.7 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
type Book struct {
|
|
Title string `json:"title"`
|
|
Author string `json:"author"`
|
|
Genres []string `json:"genres"`
|
|
DownloadURL string `json:"download_url"`
|
|
CoverURL string `json:"cover_url"`
|
|
PublishDate string `json:"publish_date"`
|
|
}
|
|
|
|
type OceanPDFParser struct {
|
|
baseURL string
|
|
client *http.Client
|
|
}
|
|
|
|
func NewOceanPDFParser() *OceanPDFParser {
|
|
return &OceanPDFParser{
|
|
baseURL: "https://oceanofpdf.com",
|
|
client: &http.Client{},
|
|
}
|
|
}
|
|
|
|
func (p *OceanPDFParser) Search(query string) ([]Book, error) {
|
|
searchURL := fmt.Sprintf("%s/?s=%s", p.baseURL, url.QueryEscape(query))
|
|
|
|
resp, err := p.client.Get(searchURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search request failed: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse HTML: %v", err)
|
|
}
|
|
|
|
var books []Book
|
|
doc.Find("article.post").Each(func(i int, s *goquery.Selection) {
|
|
book := Book{}
|
|
|
|
// Extract title and download URL
|
|
titleLink := s.Find("h2.entry-title a")
|
|
book.Title = strings.TrimSpace(titleLink.Text())
|
|
book.DownloadURL, _ = titleLink.Attr("href")
|
|
|
|
// Extract author and genres
|
|
metaInfo := s.Find("div.postmetainfo")
|
|
metaInfo.Find("strong").Each(func(i int, meta *goquery.Selection) {
|
|
label := strings.TrimSpace(meta.Text())
|
|
value := strings.TrimSpace(meta.Next().Text())
|
|
|
|
switch label {
|
|
case "Author:":
|
|
book.Author = value
|
|
case "Genre:":
|
|
book.Genres = strings.Split(value, ", ")
|
|
}
|
|
})
|
|
|
|
// Extract cover image URL
|
|
coverImg := s.Find("img.post-image")
|
|
book.CoverURL, _ = coverImg.Attr("data-src")
|
|
|
|
// Extract publish date
|
|
book.PublishDate = strings.TrimSpace(s.Find("time.entry-time").Text())
|
|
|
|
books = append(books, book)
|
|
})
|
|
|
|
return books, nil
|
|
}
|
|
|
|
func (p *OceanPDFParser) GetDownloadLinks(bookURL string) (map[string]string, error) {
|
|
resp, err := p.client.Get(bookURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get book page: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse book page: %v", err)
|
|
}
|
|
|
|
links := make(map[string]string)
|
|
doc.Find("div.download-links a").Each(func(i int, s *goquery.Selection) {
|
|
format := strings.TrimSpace(s.Text())
|
|
href, exists := s.Attr("href")
|
|
if exists {
|
|
links[format] = href
|
|
}
|
|
})
|
|
|
|
return links, nil
|
|
}
|
|
|
|
func main() {
|
|
parser := NewOceanPDFParser()
|
|
|
|
// Example search
|
|
books, err := parser.Search("programming")
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
// Print results as JSON
|
|
output, _ := json.MarshalIndent(books, "", " ")
|
|
fmt.Println(string(output))
|
|
}
|