mirror of
https://github.com/eRgo35/spotify-parser.git
synced 2025-12-15 04:16:10 +01:00
1.0.0 initial rust release
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
/target
|
||||
**/target
|
||||
.env
|
||||
1906
Cargo.lock
generated
Normal file
1906
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "lib-spotify-parser"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
regex = "1.10.6"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"] }
|
||||
scraper = "0.19.1"
|
||||
serde = "1.0.204"
|
||||
serde_json = "1.0.122"
|
||||
tokio = { version = "1.39.2", features = ["macros"] }
|
||||
14
src/error.rs
Normal file
14
src/error.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParseError {
|
||||
InvalidUrl,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Invalid URL")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseError {}
|
||||
154
src/lib.rs
Normal file
154
src/lib.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
mod error;
|
||||
mod parser;
|
||||
mod retriever;
|
||||
|
||||
use error::ParseError;
|
||||
use parser::{parse_list, parse_single, USER_AGENT};
|
||||
use regex::Regex;
|
||||
use reqwest;
|
||||
use retriever::{retrieve_async_track, retrieve_async_tracks, retrieve_track, retrieve_tracks};
|
||||
|
||||
pub fn retrieve_url(url: &str) -> Result<Vec<String>, ParseError> {
|
||||
let spotify_regex: Regex = Regex::new(r"https?:\/\/(?:embed\.|open\.)spotify\.com\/(track|album|playlist)\/([a-zA-Z0-9]+)(?:\?si=[\w-]+)?").unwrap();
|
||||
|
||||
if let Some(captures) = spotify_regex.captures(url) {
|
||||
let category = captures.get(1).unwrap().as_str();
|
||||
let id = captures.get(2).unwrap().as_str();
|
||||
|
||||
match category {
|
||||
"track" => {
|
||||
let track = retrieve_track(category, id).unwrap();
|
||||
Ok(vec![track])
|
||||
}
|
||||
"playlist" | "album" => {
|
||||
let tracks = retrieve_tracks(category, id).unwrap();
|
||||
Ok(tracks)
|
||||
}
|
||||
_ => Err(ParseError::InvalidUrl),
|
||||
}
|
||||
} else {
|
||||
Err(ParseError::InvalidUrl)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn retrieve_async_url(url: &str) -> Result<Vec<String>, ParseError> {
|
||||
let spotify_regex: Regex = Regex::new(r"https?:\/\/(?:embed\.|open\.)spotify\.com\/(track|album|playlist)\/([a-zA-Z0-9]+)(?:\?si=[\w-]+)?").unwrap();
|
||||
|
||||
if let Some(captures) = spotify_regex.captures(url) {
|
||||
let category = captures.get(1).unwrap().as_str();
|
||||
let id = captures.get(2).unwrap().as_str();
|
||||
|
||||
match category {
|
||||
"track" => {
|
||||
let track = retrieve_async_track(category, id).await.unwrap();
|
||||
Ok(vec![track])
|
||||
}
|
||||
"playlist" | "album" => {
|
||||
let tracks = retrieve_async_tracks(category, id).await.unwrap();
|
||||
Ok(tracks)
|
||||
}
|
||||
_ => Err(ParseError::InvalidUrl),
|
||||
}
|
||||
} else {
|
||||
Err(ParseError::InvalidUrl)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TRACK: &str = "https://open.spotify.com/track/4PTG3Z6ehGkBFwjybzWkR8?si=e0a8c8ada8284e43";
|
||||
const MULTIPLE_ARTISTS_TRACK: &str =
|
||||
"https://open.spotify.com/track/1O0SdrryPeGp6eSSUibdgo?si=4ae58febe9e74eae";
|
||||
const PLAYLIST: &str =
|
||||
"https://open.spotify.com/playlist/37i9dQZF1DZ06evO05tE88?si=e0c6f44d176f44e6";
|
||||
const ALBUM: &str =
|
||||
"https://open.spotify.com/album/6eUW0wxWtzkFdaEFsTJto6?si=_grLtlNySNyfJTZr8tP44Q";
|
||||
|
||||
#[test]
|
||||
fn check_track() {
|
||||
let track: Vec<&str> = vec!["Rick Astley - Never Gonna Give You Up"];
|
||||
assert_eq!(retrieve_url(TRACK).unwrap(), track);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_multiple_artists_track() {
|
||||
let track: Vec<&str> = vec!["Will o' the wisp, Rick Astley - Blood on My Tie"];
|
||||
assert_eq!(retrieve_url(MULTIPLE_ARTISTS_TRACK).unwrap(), track);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_playlist() {
|
||||
let playlist: Vec<&str> = vec![
|
||||
"Rick Astley - Never Gonna Give You Up",
|
||||
"Rick Astley - Take Me to Your Heart (2023 Remaster)",
|
||||
"Rick Astley - Cry for Help - Single Edit",
|
||||
"Rick Astley - Never Gonna Stop",
|
||||
"Rick Astley - Together Forever",
|
||||
"Rick Astley - Hold Me in Your Arms (7\" Version)",
|
||||
"Rick Astley - Angels on My Side",
|
||||
"New Kids On The Block, Salt-N-Pepa, Rick Astley, En Vogue - Bring Back The Time",
|
||||
"Rick Astley - Whenever You Need Somebody",
|
||||
"Rick Astley - She Wants to Dance with Me (2023 Remaster)",
|
||||
"Rick Astley - Dippin My Feet",
|
||||
"Rick Astley - My Arms Keep Missing You",
|
||||
"Rick Astley - Don't Say Goodbye",
|
||||
"Rick Astley - Dance",
|
||||
"Rick Astley - Giving Up On Love (7'' Pop Version)",
|
||||
"Rick Astley - Never Gonna Give You Up (Cake Mix)",
|
||||
"Rick Astley - It Would Take a Strong Strong Man",
|
||||
"Rick Astley - Driving Me Crazy",
|
||||
"Rick Astley - Beautiful Life",
|
||||
"Rick Astley - I Don't Want to Lose Her",
|
||||
"Rick Astley - Keep Singing",
|
||||
"Rick Astley - Forever and More",
|
||||
"Rick Astley - Hopelessly",
|
||||
"Rick Astley - When I Fall in Love",
|
||||
"Rick Astley - Every One of Us",
|
||||
"Rick Astley - High Enough",
|
||||
"Rick Astley - Ain't Too Proud to Beg (2023 Remaster)",
|
||||
"Rick Astley - I'll Never Let You Down",
|
||||
"Trevor Horn, Rick Astley - Owner Of A Lonely Heart",
|
||||
"Rick Astley - Letting Go",
|
||||
"Rick Astley - Never Knew Love",
|
||||
"Rick Astley - Lights Out - Radio Edit",
|
||||
"Rick Astley - Try",
|
||||
"Rick Astley - Dial My Number (2023 Remaster)",
|
||||
"Rick Astley - Wish Away",
|
||||
"Rick Astley - Giant",
|
||||
"Rick Astley - Move Right Out",
|
||||
"Rick Astley - Till Then (Time Stands Still) (2023 Remaster)",
|
||||
"Rick Astley - Pray with Me",
|
||||
"Rick Astley - I Don't Want to Be Your Lover",
|
||||
"Will o' the wisp, Rick Astley - Blood on My Tie",
|
||||
"Rick Astley - Can't Help Falling in Love",
|
||||
"Rick Astley - I Like the Sun",
|
||||
"Rick Astley - She Makes Me",
|
||||
"Rick Astley - Body and Soul",
|
||||
"Rick Astley - (They Long to Be) Close to You",
|
||||
"Rick Astley - Unwanted (Official Song from the Podcast)",
|
||||
"Rick Astley - Last Night on Earth",
|
||||
"Rick Astley - Everlong - Acoustic Version",
|
||||
"Rick Astley - Superman",
|
||||
];
|
||||
assert_eq!(retrieve_url(PLAYLIST).unwrap(), playlist);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_album() {
|
||||
let album: Vec<&str> = vec![
|
||||
"Rick Astley - Never Gonna Give You Up",
|
||||
"Rick Astley - Whenever You Need Somebody",
|
||||
"Rick Astley - Together Forever",
|
||||
"Rick Astley - It Would Take a Strong Strong Man",
|
||||
"Rick Astley - The Love Has Gone",
|
||||
"Rick Astley - Don't Say Goodbye",
|
||||
"Rick Astley - Slipping Away",
|
||||
"Rick Astley - No More Looking for Love",
|
||||
"Rick Astley - You Move Me",
|
||||
"Rick Astley - When I Fall in Love",
|
||||
];
|
||||
assert_eq!(retrieve_url(ALBUM).unwrap(), album);
|
||||
}
|
||||
}
|
||||
70
src/parser.rs
Normal file
70
src/parser.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
use scraper::{Html, Selector};
|
||||
use serde_json::Value;
|
||||
use std::error::Error;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct SpotifyTrack {
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
}
|
||||
|
||||
pub(crate) const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
|
||||
|
||||
pub(crate) fn parse_single(content: String) -> Result<SpotifyTrack, Box<dyn Error>> {
|
||||
let document = Html::parse_document(&content);
|
||||
let selector = Selector::parse("#__NEXT_DATA__").unwrap();
|
||||
|
||||
if let Some(script_element) = document.select(&selector).next() {
|
||||
let json_str = script_element.inner_html();
|
||||
let json_value: Value = serde_json::from_str(&json_str)?;
|
||||
let metadata = &json_value["props"]["pageProps"]["state"]["data"]["entity"];
|
||||
|
||||
// println!("{metadata:?}");
|
||||
|
||||
let title = metadata["title"].as_str().unwrap().to_string();
|
||||
|
||||
let artists = metadata["artists"].as_array().unwrap();
|
||||
let artists_list: Vec<String> = artists
|
||||
.iter()
|
||||
.map(|artist| artist["name"].as_str().unwrap().to_string())
|
||||
.collect();
|
||||
let artist: String = artists_list.join(", ").to_string();
|
||||
|
||||
let track = SpotifyTrack { title, artist };
|
||||
|
||||
Ok(track)
|
||||
} else {
|
||||
Err("Could not find element".into())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_list(content: String) -> Result<Vec<SpotifyTrack>, Box<dyn Error>> {
|
||||
let document = Html::parse_document(&content);
|
||||
let selector = Selector::parse("#__NEXT_DATA__").unwrap();
|
||||
|
||||
if let Some(script_element) = document.select(&selector).next() {
|
||||
let json_str = script_element.inner_html();
|
||||
let json_value: Value = serde_json::from_str(&json_str)?;
|
||||
let metadata = &json_value["props"]["pageProps"]["state"]["data"]["entity"]["trackList"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
|
||||
// println!("{metadata:?}");
|
||||
|
||||
let tracks: Vec<SpotifyTrack> = metadata
|
||||
.iter()
|
||||
.map(|track| {
|
||||
let title = track["title"].as_str().unwrap().to_string();
|
||||
let artist = track["subtitle"].as_str().unwrap().to_string();
|
||||
|
||||
SpotifyTrack { title, artist }
|
||||
})
|
||||
.collect();
|
||||
|
||||
// println!("{tracks:?}");
|
||||
|
||||
Ok(tracks)
|
||||
} else {
|
||||
Err("Could not find element".into())
|
||||
}
|
||||
}
|
||||
85
src/retriever.rs
Normal file
85
src/retriever.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use crate::*;
|
||||
|
||||
pub fn retrieve_track(category: &str, id: &str) -> Result<String, String> {
|
||||
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
|
||||
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.use_rustls_tls()
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let response = client.get(&embed_url).send().unwrap();
|
||||
let content = response.text().unwrap();
|
||||
|
||||
let parsed_content = parse_single(content).unwrap();
|
||||
|
||||
Ok(format!(
|
||||
"{} - {}",
|
||||
parsed_content.artist, parsed_content.title
|
||||
))
|
||||
}
|
||||
|
||||
pub fn retrieve_tracks(category: &str, id: &str) -> Result<Vec<String>, String> {
|
||||
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
|
||||
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.use_rustls_tls()
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let response = client.get(&embed_url).send().unwrap();
|
||||
let content = response.text().unwrap();
|
||||
|
||||
let parsed_content = parse_list(content).unwrap();
|
||||
|
||||
let tracks: Vec<String> = parsed_content
|
||||
.iter()
|
||||
.map(|track| format!("{} - {}", track.artist, track.title))
|
||||
.collect();
|
||||
|
||||
Ok(tracks)
|
||||
}
|
||||
|
||||
pub async fn retrieve_async_track(category: &str, id: &str) -> Result<String, String> {
|
||||
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.use_rustls_tls()
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let response = client.get(&embed_url).send().await.unwrap();
|
||||
let content = response.text().await.unwrap();
|
||||
|
||||
let parsed_content = parse_single(content).unwrap();
|
||||
|
||||
Ok(format!(
|
||||
"{} - {}",
|
||||
parsed_content.artist, parsed_content.title
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn retrieve_async_tracks(category: &str, id: &str) -> Result<Vec<String>, String> {
|
||||
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.use_rustls_tls()
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let response = client.get(&embed_url).send().await.unwrap();
|
||||
let content = response.text().await.unwrap();
|
||||
|
||||
let parsed_content = parse_list(content).unwrap();
|
||||
|
||||
let tracks: Vec<String> = parsed_content
|
||||
.iter()
|
||||
.map(|track| format!("{} - {}", track.artist, track.title))
|
||||
.collect();
|
||||
|
||||
Ok(tracks)
|
||||
}
|
||||
Reference in New Issue
Block a user