how to define parser when using BS4 in python
up vote
1
down vote
favorite
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
response = requests.get(url)
# parse html
page = str(BeautifulSoup(response.content))
def getURL(page):
"""
:param page: html of web page (here: Python home page)
:return: urls in that page
"""
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
print(url)
else:
break
I am using above code to get list of all youtube videos on webpage. If i try to do this. I get following error
The code that caused this warning is on line 9 of the file C:/Users/PycharmProjects/ReadCSVFile/venv/Links.py. To get rid of this warning, change code that looks like this:
I did and started using html but some different error came .
I am using Python 3.0 . I am using IDE Pycharm.
Can someone please help me this.
python-3.x beautifulsoup
add a comment |
up vote
1
down vote
favorite
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
response = requests.get(url)
# parse html
page = str(BeautifulSoup(response.content))
def getURL(page):
"""
:param page: html of web page (here: Python home page)
:return: urls in that page
"""
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
print(url)
else:
break
I am using above code to get list of all youtube videos on webpage. If i try to do this. I get following error
The code that caused this warning is on line 9 of the file C:/Users/PycharmProjects/ReadCSVFile/venv/Links.py. To get rid of this warning, change code that looks like this:
I did and started using html but some different error came .
I am using Python 3.0 . I am using IDE Pycharm.
Can someone please help me this.
python-3.x beautifulsoup
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
response = requests.get(url)
# parse html
page = str(BeautifulSoup(response.content))
def getURL(page):
"""
:param page: html of web page (here: Python home page)
:return: urls in that page
"""
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
print(url)
else:
break
I am using above code to get list of all youtube videos on webpage. If i try to do this. I get following error
The code that caused this warning is on line 9 of the file C:/Users/PycharmProjects/ReadCSVFile/venv/Links.py. To get rid of this warning, change code that looks like this:
I did and started using html but some different error came .
I am using Python 3.0 . I am using IDE Pycharm.
Can someone please help me this.
python-3.x beautifulsoup
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
response = requests.get(url)
# parse html
page = str(BeautifulSoup(response.content))
def getURL(page):
"""
:param page: html of web page (here: Python home page)
:return: urls in that page
"""
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
print(url)
else:
break
I am using above code to get list of all youtube videos on webpage. If i try to do this. I get following error
The code that caused this warning is on line 9 of the file C:/Users/PycharmProjects/ReadCSVFile/venv/Links.py. To get rid of this warning, change code that looks like this:
I did and started using html but some different error came .
I am using Python 3.0 . I am using IDE Pycharm.
Can someone please help me this.
python-3.x beautifulsoup
python-3.x beautifulsoup
edited Nov 11 at 11:13
ewwink
6,22122233
6,22122233
asked Nov 11 at 0:54
NewtoPython
296
296
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
up vote
0
down vote
its not error, but warning you didn't set parser which can be 'html.parser'
, 'lxml'
, 'xml'
. change it to like
page = BeautifulSoup(response.content, 'html.parser')
your code above actually not doing what BeautifulSoup
do, but here the example using it.
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('n'.join(all_url))
add a comment |
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
0
down vote
its not error, but warning you didn't set parser which can be 'html.parser'
, 'lxml'
, 'xml'
. change it to like
page = BeautifulSoup(response.content, 'html.parser')
your code above actually not doing what BeautifulSoup
do, but here the example using it.
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('n'.join(all_url))
add a comment |
up vote
0
down vote
its not error, but warning you didn't set parser which can be 'html.parser'
, 'lxml'
, 'xml'
. change it to like
page = BeautifulSoup(response.content, 'html.parser')
your code above actually not doing what BeautifulSoup
do, but here the example using it.
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('n'.join(all_url))
add a comment |
up vote
0
down vote
up vote
0
down vote
its not error, but warning you didn't set parser which can be 'html.parser'
, 'lxml'
, 'xml'
. change it to like
page = BeautifulSoup(response.content, 'html.parser')
your code above actually not doing what BeautifulSoup
do, but here the example using it.
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('n'.join(all_url))
its not error, but warning you didn't set parser which can be 'html.parser'
, 'lxml'
, 'xml'
. change it to like
page = BeautifulSoup(response.content, 'html.parser')
your code above actually not doing what BeautifulSoup
do, but here the example using it.
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('n'.join(all_url))
answered Nov 11 at 11:05
ewwink
6,22122233
6,22122233
add a comment |
add a comment |
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53244883%2fhow-to-define-parser-when-using-bs4-in-python%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown