1.0.0 initial rust release

This commit is contained in:
2024-08-05 15:49:34 +02:00
commit 1a1b0ea294
7 changed files with 2244 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/target
**/target
.env

1906
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

12
Cargo.toml Normal file
View File

@@ -0,0 +1,12 @@
[package]
name = "lib-spotify-parser"
version = "1.0.0"
edition = "2021"
[dependencies]
regex = "1.10.6"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"] }
scraper = "0.19.1"
serde = "1.0.204"
serde_json = "1.0.122"
tokio = { version = "1.39.2", features = ["macros"] }

14
src/error.rs Normal file
View File

@@ -0,0 +1,14 @@
use std::fmt;
#[derive(Debug)]
pub enum ParseError {
InvalidUrl,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Invalid URL")
}
}
impl std::error::Error for ParseError {}

154
src/lib.rs Normal file
View File

@@ -0,0 +1,154 @@
mod error;
mod parser;
mod retriever;
use error::ParseError;
use parser::{parse_list, parse_single, USER_AGENT};
use regex::Regex;
use reqwest;
use retriever::{retrieve_async_track, retrieve_async_tracks, retrieve_track, retrieve_tracks};
pub fn retrieve_url(url: &str) -> Result<Vec<String>, ParseError> {
let spotify_regex: Regex = Regex::new(r"https?:\/\/(?:embed\.|open\.)spotify\.com\/(track|album|playlist)\/([a-zA-Z0-9]+)(?:\?si=[\w-]+)?").unwrap();
if let Some(captures) = spotify_regex.captures(url) {
let category = captures.get(1).unwrap().as_str();
let id = captures.get(2).unwrap().as_str();
match category {
"track" => {
let track = retrieve_track(category, id).unwrap();
Ok(vec![track])
}
"playlist" | "album" => {
let tracks = retrieve_tracks(category, id).unwrap();
Ok(tracks)
}
_ => Err(ParseError::InvalidUrl),
}
} else {
Err(ParseError::InvalidUrl)
}
}
pub async fn retrieve_async_url(url: &str) -> Result<Vec<String>, ParseError> {
let spotify_regex: Regex = Regex::new(r"https?:\/\/(?:embed\.|open\.)spotify\.com\/(track|album|playlist)\/([a-zA-Z0-9]+)(?:\?si=[\w-]+)?").unwrap();
if let Some(captures) = spotify_regex.captures(url) {
let category = captures.get(1).unwrap().as_str();
let id = captures.get(2).unwrap().as_str();
match category {
"track" => {
let track = retrieve_async_track(category, id).await.unwrap();
Ok(vec![track])
}
"playlist" | "album" => {
let tracks = retrieve_async_tracks(category, id).await.unwrap();
Ok(tracks)
}
_ => Err(ParseError::InvalidUrl),
}
} else {
Err(ParseError::InvalidUrl)
}
}
#[cfg(test)]
mod tests {
use super::*;
const TRACK: &str = "https://open.spotify.com/track/4PTG3Z6ehGkBFwjybzWkR8?si=e0a8c8ada8284e43";
const MULTIPLE_ARTISTS_TRACK: &str =
"https://open.spotify.com/track/1O0SdrryPeGp6eSSUibdgo?si=4ae58febe9e74eae";
const PLAYLIST: &str =
"https://open.spotify.com/playlist/37i9dQZF1DZ06evO05tE88?si=e0c6f44d176f44e6";
const ALBUM: &str =
"https://open.spotify.com/album/6eUW0wxWtzkFdaEFsTJto6?si=_grLtlNySNyfJTZr8tP44Q";
#[test]
fn check_track() {
let track: Vec<&str> = vec!["Rick Astley - Never Gonna Give You Up"];
assert_eq!(retrieve_url(TRACK).unwrap(), track);
}
#[test]
fn check_multiple_artists_track() {
let track: Vec<&str> = vec!["Will o' the wisp, Rick Astley - Blood on My Tie"];
assert_eq!(retrieve_url(MULTIPLE_ARTISTS_TRACK).unwrap(), track);
}
#[test]
fn check_playlist() {
let playlist: Vec<&str> = vec![
"Rick Astley - Never Gonna Give You Up",
"Rick Astley - Take Me to Your Heart (2023 Remaster)",
"Rick Astley - Cry for Help - Single Edit",
"Rick Astley - Never Gonna Stop",
"Rick Astley - Together Forever",
"Rick Astley - Hold Me in Your Arms (7\" Version)",
"Rick Astley - Angels on My Side",
"New Kids On The Block, Salt-N-Pepa, Rick Astley, En Vogue - Bring Back The Time",
"Rick Astley - Whenever You Need Somebody",
"Rick Astley - She Wants to Dance with Me (2023 Remaster)",
"Rick Astley - Dippin My Feet",
"Rick Astley - My Arms Keep Missing You",
"Rick Astley - Don't Say Goodbye",
"Rick Astley - Dance",
"Rick Astley - Giving Up On Love (7'' Pop Version)",
"Rick Astley - Never Gonna Give You Up (Cake Mix)",
"Rick Astley - It Would Take a Strong Strong Man",
"Rick Astley - Driving Me Crazy",
"Rick Astley - Beautiful Life",
"Rick Astley - I Don't Want to Lose Her",
"Rick Astley - Keep Singing",
"Rick Astley - Forever and More",
"Rick Astley - Hopelessly",
"Rick Astley - When I Fall in Love",
"Rick Astley - Every One of Us",
"Rick Astley - High Enough",
"Rick Astley - Ain't Too Proud to Beg (2023 Remaster)",
"Rick Astley - I'll Never Let You Down",
"Trevor Horn, Rick Astley - Owner Of A Lonely Heart",
"Rick Astley - Letting Go",
"Rick Astley - Never Knew Love",
"Rick Astley - Lights Out - Radio Edit",
"Rick Astley - Try",
"Rick Astley - Dial My Number (2023 Remaster)",
"Rick Astley - Wish Away",
"Rick Astley - Giant",
"Rick Astley - Move Right Out",
"Rick Astley - Till Then (Time Stands Still) (2023 Remaster)",
"Rick Astley - Pray with Me",
"Rick Astley - I Don't Want to Be Your Lover",
"Will o' the wisp, Rick Astley - Blood on My Tie",
"Rick Astley - Can't Help Falling in Love",
"Rick Astley - I Like the Sun",
"Rick Astley - She Makes Me",
"Rick Astley - Body and Soul",
"Rick Astley - (They Long to Be) Close to You",
"Rick Astley - Unwanted (Official Song from the Podcast)",
"Rick Astley - Last Night on Earth",
"Rick Astley - Everlong - Acoustic Version",
"Rick Astley - Superman",
];
assert_eq!(retrieve_url(PLAYLIST).unwrap(), playlist);
}
#[test]
fn check_album() {
let album: Vec<&str> = vec![
"Rick Astley - Never Gonna Give You Up",
"Rick Astley - Whenever You Need Somebody",
"Rick Astley - Together Forever",
"Rick Astley - It Would Take a Strong Strong Man",
"Rick Astley - The Love Has Gone",
"Rick Astley - Don't Say Goodbye",
"Rick Astley - Slipping Away",
"Rick Astley - No More Looking for Love",
"Rick Astley - You Move Me",
"Rick Astley - When I Fall in Love",
];
assert_eq!(retrieve_url(ALBUM).unwrap(), album);
}
}

70
src/parser.rs Normal file
View File

@@ -0,0 +1,70 @@
use scraper::{Html, Selector};
use serde_json::Value;
use std::error::Error;
#[derive(Debug, Clone)]
pub(crate) struct SpotifyTrack {
pub title: String,
pub artist: String,
}
pub(crate) const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
pub(crate) fn parse_single(content: String) -> Result<SpotifyTrack, Box<dyn Error>> {
let document = Html::parse_document(&content);
let selector = Selector::parse("#__NEXT_DATA__").unwrap();
if let Some(script_element) = document.select(&selector).next() {
let json_str = script_element.inner_html();
let json_value: Value = serde_json::from_str(&json_str)?;
let metadata = &json_value["props"]["pageProps"]["state"]["data"]["entity"];
// println!("{metadata:?}");
let title = metadata["title"].as_str().unwrap().to_string();
let artists = metadata["artists"].as_array().unwrap();
let artists_list: Vec<String> = artists
.iter()
.map(|artist| artist["name"].as_str().unwrap().to_string())
.collect();
let artist: String = artists_list.join(", ").to_string();
let track = SpotifyTrack { title, artist };
Ok(track)
} else {
Err("Could not find element".into())
}
}
pub(crate) fn parse_list(content: String) -> Result<Vec<SpotifyTrack>, Box<dyn Error>> {
let document = Html::parse_document(&content);
let selector = Selector::parse("#__NEXT_DATA__").unwrap();
if let Some(script_element) = document.select(&selector).next() {
let json_str = script_element.inner_html();
let json_value: Value = serde_json::from_str(&json_str)?;
let metadata = &json_value["props"]["pageProps"]["state"]["data"]["entity"]["trackList"]
.as_array()
.unwrap();
// println!("{metadata:?}");
let tracks: Vec<SpotifyTrack> = metadata
.iter()
.map(|track| {
let title = track["title"].as_str().unwrap().to_string();
let artist = track["subtitle"].as_str().unwrap().to_string();
SpotifyTrack { title, artist }
})
.collect();
// println!("{tracks:?}");
Ok(tracks)
} else {
Err("Could not find element".into())
}
}

85
src/retriever.rs Normal file
View File

@@ -0,0 +1,85 @@
use crate::*;
pub fn retrieve_track(category: &str, id: &str) -> Result<String, String> {
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
let client = reqwest::blocking::Client::builder()
.use_rustls_tls()
.user_agent(USER_AGENT)
.build()
.unwrap();
let response = client.get(&embed_url).send().unwrap();
let content = response.text().unwrap();
let parsed_content = parse_single(content).unwrap();
Ok(format!(
"{} - {}",
parsed_content.artist, parsed_content.title
))
}
pub fn retrieve_tracks(category: &str, id: &str) -> Result<Vec<String>, String> {
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
let client = reqwest::blocking::Client::builder()
.use_rustls_tls()
.user_agent(USER_AGENT)
.build()
.unwrap();
let response = client.get(&embed_url).send().unwrap();
let content = response.text().unwrap();
let parsed_content = parse_list(content).unwrap();
let tracks: Vec<String> = parsed_content
.iter()
.map(|track| format!("{} - {}", track.artist, track.title))
.collect();
Ok(tracks)
}
pub async fn retrieve_async_track(category: &str, id: &str) -> Result<String, String> {
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
let client = reqwest::Client::builder()
.use_rustls_tls()
.user_agent(USER_AGENT)
.build()
.unwrap();
let response = client.get(&embed_url).send().await.unwrap();
let content = response.text().await.unwrap();
let parsed_content = parse_single(content).unwrap();
Ok(format!(
"{} - {}",
parsed_content.artist, parsed_content.title
))
}
pub async fn retrieve_async_tracks(category: &str, id: &str) -> Result<Vec<String>, String> {
let embed_url = format!("https://embed.spotify.com/?uri=spotify:{}:{}", category, id);
let client = reqwest::Client::builder()
.use_rustls_tls()
.user_agent(USER_AGENT)
.build()
.unwrap();
let response = client.get(&embed_url).send().await.unwrap();
let content = response.text().await.unwrap();
let parsed_content = parse_list(content).unwrap();
let tracks: Vec<String> = parsed_content
.iter()
.map(|track| format!("{} - {}", track.artist, track.title))
.collect();
Ok(tracks)
}