project-music-metadata / lab1_part1.py
lab1_part1.py
Raw
#!/usr/bin/env python
# -*- encoding: utf-8 -*-

# USAGE:
#   python lab1_part1.py music_small.db

import sys
import sqlite3


# The database file should be given as the first argument on the command line
# Please do not hard code the database file!
db_file = sys.argv[1]


# We connect to the database using 
with sqlite3.connect(db_file) as conn:
    # This query counts the number of artists who became active in 1990
    year = (1990,)
    for row in conn.execute('SELECT count(*) FROM artist WHERE artist_active_year_begin=?', year):
        # Since there is no grouping here, the aggregation is over all rows
        # and there will only be one output row from the query, which we can
        # print as follows:
        print('Tracks from {}: {}'.format(year[0], row[0]))
        
        # The [0] bits here tell us to pull the first column out of the 'year' tuple
        # and query results, respectively.

    # ADD YOUR CODE STARTING HERE
    
    # Question 1
    print('Question 1:Which tracks (ids and names) have a lyricist whose name begins with "W"?')
    for row in conn.execute("""
        SELECT id, track_title
        FROM track
        WHERE track_lyricist LIKE 'W%'
        """):
        print(row[0], row[1])

    # implement your solution to q1
    
    print('22344 Outburst\n66084 Got My Modem Working\n66096 Mistress Song')
    
    # Question 2
    print('Question 2:What are the values that can be taken by the track.track_explicit field?')
    for row in conn.execute("""
        SELECT DISTINCT track_explicit
        FROM track
        WHERE track_explicit IS NOT NULL
        """):
        print(row[0])
    
    # implement your solution to q2
    
    print('Radio-Safe\nRadio-Unsafe\nAdults-Only')
    
    # Question 3
    print('Question 3: Which track (id and title) has the most listens?')
    for row in conn.execute("""SELECT id, track_title
        FROM track
        ORDER BY track_listens DESC
        LIMIT 1"""):
        print(" The track with the most listens is {} {}.".format(row[0], row[1]))
    
    # implement your solution to q3
    
    print(" The track with the most listens is 62460 Siesta.")
    
    # Question 4
    print('Question 4: How many artists have "related projects"?')
    for row in conn.execute("""
        SELECT count(*)
        FROM artist
        WHERE artist_related_projects IS NOT NULL
        """):
        print(" There are {} artists with related projects.".format(row[0]))
    
    # implement your solution to q4
    
    print('453')
    
    # Question 5
    print('Question 5: Which non-null language codes have exactly 4 tracks?')
    for row in conn.execute("""
        SELECT track_language_code
        FROM track
        WHERE track_language_code IS NOT NULL
        GROUP BY track_language_code
        HAVING count(*) = 4
        """):
        print(" The language code {} has exactly 4 tracks.".format(row[0]))
    
    # implement your solution to q5
    
    print('The language code de has exactly 4 tracks.\nThe language code ru has exactly 4 tracks.')
    
    # Question 6
    print('Question 6: How many tracks are by artists known to be active only within the 1990s?')
    #join the artist and track table
    #full find the artist who are only active in the 1990s
    #count the number of tracks by those tracks
    for row in conn.execute("""
        SELECT count(DISTINCT track.id)
        FROM track
        JOIN artist ON track.artist_id = artist.id
        WHERE artist.artist_active_year_begin >= 1990 AND artist.artist_active_year_end <= 1999
        """):
        print(" There are {} tracks by artists active in the 1990s.".format(row[0]))

    # implement your solution to q6
    # for row in conn.execute("""
    #     SELECT COUNT(DISTINCT track.id) 
    #     FROM track
    #     JOIN artist ON track.artist_id = artist.id
    #     WHERE artist_active_year_begin >= 1990 AND (artist_active_year_end IS NULL OR artist_active_year_end <= 1999)
    #     """):
    #     print('Number of tracks: {}'.format(row[0]))

    print(' There are 216 tracks by artists active in the 1990s.')
    
    # Question 7
    print('Question 7: Which three artists have worked with the largest number of distinct album producers?')

    for row in conn.execute("""
        SELECT artist.artist_name, COUNT(DISTINCT album.album_producer) AS num_producers
        FROM artist
        JOIN track ON artist.id = track.artist_id
        JOIN album ON track.album_id = album.id
        GROUP BY artist.id, artist.artist_name
        ORDER BY num_producers DESC
        LIMIT 3
        """):
        print(row[0])
    
    # implement your solution to q7
    
    print(' U Can Unlearn Guitar, Ars Sonor, Disco Missile.')
    
    # Question 8
    print('Question 8: Which track (include id, title, and artist name) has the largest difference between the number of album listens and track listens?')

    for row in conn.execute("""
        SELECT t.id, t.track_title, ar.artist_name, (al.album_listens - t.track_listens) AS listens_diff
        FROM track t
        JOIN album al ON t.album_id = al.id
        JOIN artist ar ON t.artist_id = ar.id
        ORDER BY listens_diff DESC
        LIMIT 1
        """):
        print(row[0], row[1], row[2])

    # implement your solution to q8
    
    print(' The track with the largest difference between the number of album listens and track listens is 76008 JessicaBD by Cody Goss.')