This is the part of the html that I am extracting on the platform and it has the snippet I want to get, the value of the href attribute of the tag with the class "booktitle"
</div>
<div class="elementList" style="padding-top: 10px;">
<div class="left" style="width: 75%;">
<a class="leftAlignedImage" href="/book/show/2784.Ways_of_Seeing" title="Ways of Seeing"><img alt="Ways of Seeing" src="https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1464018308l/2784._SY75_.jpg"/></a>
<a class="bookTitle" href="/book/show/2784.Ways_of_Seeing">Ways of Seeing (Paperback)</a>
<br/>
<span class="by">by</span>
<span itemprop="author" itemscope="" itemtype="http://schema.org/Person">
<div class="authorName__container">
<a class="authorName" href="https://www.goodreads.com/author/show/29919.John_Berger" itemprop="url"><span itemprop="name">John Berger</span></a>
</div>
After logging in using the mechanize library I have this piece of code to try to extract it, but here it returns the name of the book as the code asks, I tried several ways to get only the href value but none worked so far
from bs4 import BeautifulSoup as bs4
from requests import Session
from lxml import html
import Downloader as dw
import requests
def getGenders(browser : mc.Browser, url: str, name: str) -> None:
res = browser.open(url)
aux = res.read()
html2 = bs4(aux, 'html.parser')
with open(name, "w", encoding='utf-8') as file2:
file2.write( str( html2 ) )
getGenders(br, "https://www.goodreads.com/shelf/show/art", "gendersBooks.html")
with open("gendersBooks.html", "r", encoding='utf8') as file:
contents = file.read()
bsObj = bs4(contents, "lxml")
aux = open("books.text", "w", encoding='utf8')
officials = bsObj.find_all('a', {'class' : 'booktitle'})
for text in officials:
print(text.get_text())
aux.write(text.get_text().format())
aux.close()
file.close()