Update scrape-internet-radio-manual.sh
This commit is contained in:
parent
c642f2f31d
commit
901f6d1890
@ -4,10 +4,10 @@
|
|||||||
lynx --dump --listonly --nonumbers https://www.internet-radio.com/stations/ | grep 'https://www.internet-radio.com/stations/' > links.txt
|
lynx --dump --listonly --nonumbers https://www.internet-radio.com/stations/ | grep 'https://www.internet-radio.com/stations/' > links.txt
|
||||||
|
|
||||||
# strip unnessery part of links (we'll add them later in the for loop)
|
# strip unnessery part of links (we'll add them later in the for loop)
|
||||||
cat links.txt | sed 's!https://www.internet-radio.com/stations/!!' | sed 's/\///g' | sed '/^$/d' | sort | uniq > links2.txt
|
cat links.txt | sed 's!https://www.internet-radio.com/stations/!!' | sed 's/\///g' | sed '/^$/d' | sed -e 's/ /%20/g' | sort | uniq > links2.txt
|
||||||
|
|
||||||
# scrape links of the streams
|
# scrape links of the streams
|
||||||
for i in "" page{2..10} ; do for j in $(cat links2.txt) ; do curl https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' >> $j.txt ; sleep 1 ; done ; done
|
for i in "" page{2..10} ; do for j in $(cat links2.txt) ; do curl -s https://www.internet-radio.com/stations/$j/$i.html | htmlq --attribute href a | grep '.m3u' | cut -b 37- | awk -F '\\listen' '{print $1""}' | awk -F '\\.m3u' '{print $1""}' | awk -F '\\&t=' '{print $1""}' | awk '!seen[$0]++' | sed '/^$/d' | awk 'length>10' >> $j.txt ; echo "$j - $i scraped" ; done ; done
|
||||||
|
|
||||||
# a few links have more than 10 pages, the longest page is pop with 50 pages, so if you abseloutly need all of them you can do those with a longer loop
|
# a few links have more than 10 pages, the longest page is pop with 50 pages, so if you abseloutly need all of them you can do those with a longer loop
|
||||||
# here is the list of the bigger links = Country Talk 80s Oldies Dance Gospel Christian Rock Pop
|
# here is the list of the bigger links = Country Talk 80s Oldies Dance Gospel Christian Rock Pop
|
||||||
@ -15,6 +15,9 @@ for i in "" page{2..10} ; do for j in $(cat links2.txt) ; do curl https://www.in
|
|||||||
# convert links to m3u stream files
|
# convert links to m3u stream files
|
||||||
for i in $(cat links2.txt) ; do sed "s/^/#EXTINF:-1\n/" $i.txt | sed '1s/^/#EXTM3U\n/' > $i.m3u ; done
|
for i in $(cat links2.txt) ; do sed "s/^/#EXTINF:-1\n/" $i.txt | sed '1s/^/#EXTM3U\n/' > $i.m3u ; done
|
||||||
|
|
||||||
|
# replace %20 in filenames with space
|
||||||
|
for i in *.m3u ; do mv -- "$i" "$(printf '%s\n' "$i" | sed 's/%20/ /')" ; done
|
||||||
|
|
||||||
# move stream to git folder
|
# move stream to git folder
|
||||||
mv *.m3u c:/git/m3u-radio-music-playlists/internet-radio/
|
mv *.m3u c:/git/m3u-radio-music-playlists/internet-radio/
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user