vt-cs-projects / web_scraper / imdb_web_scraper.ipynb
imdb_web_scraper.ipynb
Raw

Enter your Name: Jordan Harrington

Enter your PID: jordanha23

I have neither given nor received unauthorized assistance on this assignment. See the course sylabus for details on the Honor Code policy. In particular, sharing lines of solution code is prohibited.

import requests
import bs4
import pandas as pd
import time
def get_data(movieCount): 

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
    url = 'https://www.imdb.com/search/title/?genres=comedy&start=' + str(movieCount) + '&explore=title_type,genres&ref_=adv_nxt'
    r = requests.get(url, headers=headers)
    content = r.content
    soup = bs4.BeautifulSoup(content)   
   
    tag = soup.findAll('div', attrs={'class':'lister-item-content'})
    
    ret = []
    
    for d in tag:
        
        rating = d.find('strong')
    
        n = d.find('h3', attrs={'class' : 'lister-item-header'})
        name = list(n.find_all('a'))
        
        movie_url = 'https://www.imdb.com' + str(name[0]).split('\"')[1]
        name = str(name).split('>')[1].replace('</a', "")
        
        
        movie_r = requests.get(movie_url, headers=headers)
        movie_content = movie_r.content
        movie_soup = bs4.BeautifulSoup(movie_content)
        
        director = movie_soup.find('a',  attrs={'class' : 'ipc-metadata-list-item__list-content-item ipc-metadata-list-item__list-content-item--link'})
        
        author = movie_soup.find('a',  attrs={'class' : 'ipc-link ipc-link--base', 'data-testid' : 'author-link'})
        date = movie_soup.find('li',  attrs={'class' : 'ipc-inline-list__item review-date'})
        helpful = movie_soup.find('span',  attrs={'class' : 'ipc-voting__label__count ipc-voting__label__count--up'})
                    
        t = movie_soup.find('div', attrs={'class' : 'styles__MetaDataContainer-sc-12uhu9s-0 cgqHBf'})
        
        all1 = []
        
        if name is not None:
            all1.append(name)
        else:
            all1.append("unknown-movie")
        
        all1.append(movie_url)
        
        if rating is not None:
            all1.append(float(rating.text))
        else:
            all1.append(-1.0)
        
        if director is not None:
            all1.append(director.text)
        else:
            all1.append("unknown-director")
        
        if author is not None:
            all1.append(author.text)
        else:
            all1.append("unknown-author")
        
        if date is not None:
            all1.append(date.text)
        else:
            all1.append("unknown-date")
        
        if helpful is not None:
            all1.append(str(helpful.text))
        else:
            all1.append(0)
        
        if t is not None: 
            text = t.find('div', attrs={'class' : 'ipc-html-content ipc-html-content--base'})
            all1.append(str(text))
        else:
            all1.append("unknown")
        
        ret.append(all1)
        print(str(len(ret)) + ' movie(s)')
        
    soup.decompose()  
    return ret       
results = []
x = 51

for i in range(0, 10):
    results.append(get_data(x))
    print(str(i + 1) + ' page(s) done')
    x += 51

for i in range(10, 20):
    if i == 10:
        print('taking a break...')
        time.sleep(10) # Take a break when halfway done
    
    results.append(get_data(x))
    print(str(i + 1) + ' page(s) done')
    x += 51

flatten = lambda l: [item for sublist in l for item in sublist]
df = pd.DataFrame(flatten(results), columns=['Movie Name', 'URL', 'Rating', 'Director', 'Author', 'Date', 'Helpful', 'Text'])
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
1 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
2 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
3 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
4 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
5 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
6 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
7 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
8 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
9 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
10 page(s) done
taking a break...
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
11 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
12 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
13 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
14 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
15 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
16 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
17 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
18 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
19 page(s) done
1 movie(s)
2 movie(s)
3 movie(s)
4 movie(s)
5 movie(s)
6 movie(s)
7 movie(s)
8 movie(s)
9 movie(s)
10 movie(s)
11 movie(s)
12 movie(s)
13 movie(s)
14 movie(s)
15 movie(s)
16 movie(s)
17 movie(s)
18 movie(s)
19 movie(s)
20 movie(s)
21 movie(s)
22 movie(s)
23 movie(s)
24 movie(s)
25 movie(s)
26 movie(s)
27 movie(s)
28 movie(s)
29 movie(s)
30 movie(s)
31 movie(s)
32 movie(s)
33 movie(s)
34 movie(s)
35 movie(s)
36 movie(s)
37 movie(s)
38 movie(s)
39 movie(s)
40 movie(s)
41 movie(s)
42 movie(s)
43 movie(s)
44 movie(s)
45 movie(s)
46 movie(s)
47 movie(s)
48 movie(s)
49 movie(s)
50 movie(s)
20 page(s) done
df

Movie Name URL Rating Director Author Date Helpful Text
0 The Rocky Horror Picture Show https://www.imdb.com/title/tt0073629/?ref_=adv... 7.4 Jim Sharman SameirAli Mar 2, 2017 9 <div class="ipc-html-content ipc-html-content-...
1 The Addams Family https://www.imdb.com/title/tt1620981/?ref_=adv... 5.8 Greg Tiernan Darwinskid Oct 16, 2019 26 <div class="ipc-html-content ipc-html-content-...
2 Dhindora https://www.imdb.com/title/tt14650074/?ref_=ad... 9.8 unknown-director unknown-author unknown-date 0 unknown
3 Community https://www.imdb.com/title/tt1439629/?ref_=adv... 8.5 Dan Harmon LeonardOsborneKael Oct 29, 2009 277 <div class="ipc-html-content ipc-html-content-...
4 Home Sweet Home Alone https://www.imdb.com/title/tt11012066/?ref_=ad... -1.0 Dan Mazer unknown-author unknown-date 0 None
... ... ... ... ... ... ... ... ...
995 Suedi https://www.imdb.com/title/tt13502736/?ref_=ad... 6.3 Manuel Concha MrKennyJ Oct 3, 2021 3 <div class="ipc-html-content ipc-html-content-...
996 The Disaster Artist https://www.imdb.com/title/tt3521126/?ref_=adv... 7.4 James Franco Anttell Oct 24, 2018 17 <div class="ipc-html-content ipc-html-content-...
997 Moonshine https://www.imdb.com/title/tt15201944/?ref_=ad... 6.0 Jennifer Finnigan gphelan-87969 Sep 16, 2021 12 <div class="ipc-html-content ipc-html-content-...
998 Shiva Baby https://www.imdb.com/title/tt11317142/?ref_=ad... 7.1 Emma Seligman deloudelouvain Jul 19, 2021 8 <div class="ipc-html-content ipc-html-content-...
999 Gremlins 2: The New Batch https://www.imdb.com/title/tt0099700/?ref_=adv... 6.4 Joe Dante jaws! Dec 5, 1999 30 <div class="ipc-html-content ipc-html-content-...

1000 rows × 8 columns

df['Text'] = df['Text'].apply(lambda x: x.replace('<div>', "").replace('</div>', "").replace('<br/>', "").replace('<div class="ipc-html-content ipc-html-content--base">', ""))
df                

Movie Name URL Rating Director Author Date Helpful Text
0 The Rocky Horror Picture Show https://www.imdb.com/title/tt0073629/?ref_=adv... 7.4 Jim Sharman SameirAli Mar 2, 2017 9 Movie starts with a marriage function. Hero pr...
1 The Addams Family https://www.imdb.com/title/tt1620981/?ref_=adv... 5.8 Greg Tiernan Darwinskid Oct 16, 2019 26 Not sure what movie the majority of the critic...
2 Dhindora https://www.imdb.com/title/tt14650074/?ref_=ad... 9.8 unknown-director unknown-author unknown-date 0 unknown
3 Community https://www.imdb.com/title/tt1439629/?ref_=adv... 8.5 Dan Harmon LeonardOsborneKael Oct 29, 2009 277 The promos for this series really turned me of...
4 Home Sweet Home Alone https://www.imdb.com/title/tt11012066/?ref_=ad... -1.0 Dan Mazer unknown-author unknown-date 0 None
... ... ... ... ... ... ... ... ...
995 Suedi https://www.imdb.com/title/tt13502736/?ref_=ad... 6.3 Manuel Concha MrKennyJ Oct 3, 2021 3 Quite funny. For swedish People! Watchable on ...
996 The Disaster Artist https://www.imdb.com/title/tt3521126/?ref_=adv... 7.4 James Franco Anttell Oct 24, 2018 17 First up, I haven't seen the Room (2003), but ...
997 Moonshine https://www.imdb.com/title/tt15201944/?ref_=ad... 6.0 Jennifer Finnigan gphelan-87969 Sep 16, 2021 12 Awesome show! Great acting and scenes of Nova ...
998 Shiva Baby https://www.imdb.com/title/tt11317142/?ref_=ad... 7.1 Emma Seligman deloudelouvain Jul 19, 2021 8 Shiva Baby is better than I thought it would b...
999 Gremlins 2: The New Batch https://www.imdb.com/title/tt0099700/?ref_=adv... 6.4 Joe Dante jaws! Dec 5, 1999 30 gremlins 2 is not a horror movie like the orig...

1000 rows × 8 columns

copy = df.copy()
helpful = copy['Helpful']
helpful
0        9
1       26
2        0
3      277
4        0
      ... 
995      3
996     17
997     12
998      8
999     30
Name: Helpful, Length: 1000, dtype: object
helpList = []
for i in helpful:
    if 'K' in str(i):
        number = i.replace('K', "")
        i = int(float(number) * 1000)
        helpList.append(i)
    else:
        helpList.append(int(i))
        
copy['Helpful'] = helpList

for i in copy['Helpful']:
     if 'K' in str(i):
            print(i)
copy

Movie Name URL Rating Director Author Date Helpful Text
0 The Rocky Horror Picture Show https://www.imdb.com/title/tt0073629/?ref_=adv... 7.4 Jim Sharman SameirAli Mar 2, 2017 9 Movie starts with a marriage function. Hero pr...
1 The Addams Family https://www.imdb.com/title/tt1620981/?ref_=adv... 5.8 Greg Tiernan Darwinskid Oct 16, 2019 26 Not sure what movie the majority of the critic...
2 Dhindora https://www.imdb.com/title/tt14650074/?ref_=ad... 9.8 unknown-director unknown-author unknown-date 0 unknown
3 Community https://www.imdb.com/title/tt1439629/?ref_=adv... 8.5 Dan Harmon LeonardOsborneKael Oct 29, 2009 277 The promos for this series really turned me of...
4 Home Sweet Home Alone https://www.imdb.com/title/tt11012066/?ref_=ad... -1.0 Dan Mazer unknown-author unknown-date 0 None
... ... ... ... ... ... ... ... ...
995 Suedi https://www.imdb.com/title/tt13502736/?ref_=ad... 6.3 Manuel Concha MrKennyJ Oct 3, 2021 3 Quite funny. For swedish People! Watchable on ...
996 The Disaster Artist https://www.imdb.com/title/tt3521126/?ref_=adv... 7.4 James Franco Anttell Oct 24, 2018 17 First up, I haven't seen the Room (2003), but ...
997 Moonshine https://www.imdb.com/title/tt15201944/?ref_=ad... 6.0 Jennifer Finnigan gphelan-87969 Sep 16, 2021 12 Awesome show! Great acting and scenes of Nova ...
998 Shiva Baby https://www.imdb.com/title/tt11317142/?ref_=ad... 7.1 Emma Seligman deloudelouvain Jul 19, 2021 8 Shiva Baby is better than I thought it would b...
999 Gremlins 2: The New Batch https://www.imdb.com/title/tt0099700/?ref_=adv... 6.4 Joe Dante jaws! Dec 5, 1999 30 gremlins 2 is not a horror movie like the orig...

1000 rows × 8 columns

copy.to_csv('Homework08.csv', index=False)