blob: 2052e683c970e5795b1ce6bed384fe66f68cc885 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
import sys
from bs4 import BeautifulSoup
html_doc = open(sys.argv[1], mode="r")
html = html_doc.read()
soup = BeautifulSoup(html, features="lxml")
for link in soup.find_all("a"):
if "href" in link.attrs.keys():
if (link['href'].startswith("#")
and "tabindex" not in link.attrs.keys()
and "target" not in link.attrs.keys()):
link['target'] = "_self"
html_doc.close()
html_doc = open(sys.argv[1], mode="w")
print(soup, file=html_doc)
print("File written")
|