Web Scraping
Extracting Data from HTML with BeautifulSoup
Current weather data
Discription
I want to find the current temperature.
![](/images/Current_temperature.png)
Weather
Code
import requests # A built-in library for sending HTTP requests
from bs4 import BeautifulSoup #A built-in library for pulling data out of HTML and XML files
URL = 'https://world-weather.ru/pogoda/russia/moscow/' #Send an HTTP request to the URL of the webpage you want to access
page = requests.get(URL) #Send an HTTP GET request and get a webpage. We have a Response object called page
text = page.content #Accessing the HTML content from webpage
soup = BeautifulSoup(text, 'html.parser') #Specifying the HTML parser
#save BeautifulSoup to HTML document
with open("output.html", "w", encoding='utf-8') as file: #UTF-8 is a byte oriented encoding
# prettify the soup object and convert it into a string
file.write(str(soup.prettify()))
#open HTML document
with open("output.html", "rb") as f:
doc = BeautifulSoup(f, "html.parser")
#I know that the current temperature is located in this div. i found it by CTRL+U
tag=doc.find("div", {"id": "weather-now-number"})
temp=tag.text #Reformat bs4.element to plain text
temp=" ".join(temp.split()) #rid of all repeated spaces and newlines
temp=temp.replace(" ", "") #Remove all spaces from a string
print("Current temperature in Moscow = ", temp)