Visual Basic YouTube Video Info Grabber

Introduction: Welcome to my tutorial on how to create a simple program to grab the information of a video off of YouTube. It will scrape: Title Description View Count Like Count Dislike Count You can also make it scrape the comments, related videos, uploader etc. Steps of Creation: Step 1: The first thing we need to do is import System.Net so we can use HttpWebRequest and Response, we also need ReGex and a pre-made function to extract a String between two String points:
  1.         Imports System.Net
  2.         Imports System.Text.RegularExpressions
  3.  
  4.     Private Function GetBetween(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String, Optional ByVal Index As Integer = 0) As String
  5.         Return Regex.Split(Regex.Split(Source, Str1)(Index + 1), Str2)(0)
  6.     End Function
Step 2: Now add a textbox to your form to contain the video url and a button to begin the process. In the button click event we will first open a savefiledialog box to select a saving path and verify the url:
  1.     Dim fs As SaveFileDialog = New SaveFileDialog
  2.     fs.RestoreDirectory = True
  3.     fs.Filter = "txt files (*.txt)|*.txt"
  4.     fs.FilterIndex = 1
  5.     fs.ShowDialog()
  6.     Dim url As String = ""
  7.     If (TextBox1.Text.ToLower().Contains("youtube")) Then
  8.         If (TextBox1.Text.ToLower().StartsWith("http://") Or TextBox1.Text.ToLower().StartsWith("https://")) Then
  9.             url = TextBox1.Text
  10.         Else
  11.             If (TextBox1.Text.ToLower().StartsWith("www.")) Then
  12.                 url = "http://" & TextBox1.Text
  13.             Else
  14.                 url = "http://www." & TextBox1.Text
  15.             End If
  16.         End If
  17.     ElseIf (TextBox1.Text.ToLower().StartsWith("/watch")) Then
  18.         url = "http://www.youtube.com" & TextBox1.Text.ToLower()
  19.     ElseIf (TextBox1.Text.ToLower().StartsWith("watch")) Then
  20.         url = "http://www.youtube.com/" & TextBox1.Text.ToLower()
  21.     End If
Step 3: Now, we want to send a request to the url and read the response as our source page String. Then we want to extract each piece of information and run it through a custom function which we will make in a minute:
  1.     Dim r As HttpWebRequest = HttpWebRequest.Create(url)
  2.     Dim re As HttpWebResponse = r.GetResponse
  3.     Dim src As String = New System.IO.StreamReader(re.GetResponseStream()).ReadToEnd()
  4.     Dim title2 As String = GetBetween(src, "<span id=""eow-title""", ">")
  5.     Dim title As String = GetBetween(title2, "title=""", """")
  6.     Dim desc As String = GetBetween(src, "<p id=""eow-description"" >", "</p>")
  7.     Dim likes As String = GetBetween(src, "<span class=""likes-count"">", "</span")
  8.     Dim dislikes As String = GetBetween(src, "<span class=""dislikes-count"">", "</span")
  9.     Dim views As String = GetBetween(src, "<span class=""watch-view-count "" >", "</span")
  10.     title = removeExtras(title, False)
  11.     desc = removeExtras(desc, False)
  12.     likes = removeExtras(likes)
  13.     dislikes = removeExtras(dislikes)
  14.     views = removeExtras(views)
  15.        
  16.         Using sw As New System.IO.StreamWriter(fs.FileName)
  17.         sw.WriteLine(title)
  18.         sw.WriteLine(desc)
  19.         sw.WriteLine("Likes: " & likes)
  20.         sw.WriteLine("Dislikes: " & dislikes)
  21.         sw.WriteLine("Total Views: " & views)
  22.     End Using
Once we have parsed all the information we want to write the information to the save path. Step 4: Now, for the custom function. This is just a function to: - Remove spaces from the view count, like count and dislike count. - Replace " from HTML with a quotation mark ("). - Replace ' from HTML with an apostrophe ('). - Remove HTML tags, mainly from the description (links etc).
  1.     Private Function removeExtras(ByVal s As String, Optional ByVal removeSpaces As Boolean = True)
  2.         Dim ret As String = s
  3.         If (s.Contains(" ") And removeSpaces) Then
  4.             ret = ""
  5.             For Each c As String In s
  6.                 If (Not c = " ") Then ret &= c
  7.             Next
  8.         End If
  9.         If (ret.Contains("<") And ret.Contains(">")) Then
  10.             Dim sa As Boolean = True
  11.             Dim temp As String = ""
  12.             For Each c As String In ret
  13.                 If (c = "<") Then sa = False
  14.                 If (c = ">") Then sa = True
  15.                 If (Not c = "<" And Not c = ">" And sa) Then
  16.                     temp &= c
  17.                 End If
  18.             Next
  19.             ret = temp
  20.         End If
  21.         If (ret.Contains("&quot;")) Then
  22.             ret = ret.Replace("&quot;", """")
  23.         End If
  24.         If (ret.Contains("&#39;")) Then ret = ret.Replace("&#39;", "'")
  25.         Return ret
  26.     End Function
Project Complete! Below is the full source code and a download to the project files:
  1. Imports System.Net
  2. Imports System.Text.RegularExpressions
  3. Public Class Form1
  4.     Private Function GetBetween(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String, Optional ByVal Index As Integer = 0) As String
  5.         Return Regex.Split(Regex.Split(Source, Str1)(Index + 1), Str2)(0)
  6.     End Function
  7.  
  8.     Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
  9.         Dim fs As SaveFileDialog = New SaveFileDialog
  10.         fs.RestoreDirectory = True
  11.         fs.Filter = "txt files (*.txt)|*.txt"
  12.         fs.FilterIndex = 1
  13.         fs.ShowDialog()
  14.         Dim url As String = ""
  15.         If (TextBox1.Text.ToLower().Contains("youtube")) Then
  16.             If (TextBox1.Text.ToLower().StartsWith("http://") Or TextBox1.Text.ToLower().StartsWith("https://")) Then
  17.                 url = TextBox1.Text
  18.             Else
  19.                 If (TextBox1.Text.ToLower().StartsWith("www.")) Then
  20.                     url = "http://" & TextBox1.Text
  21.                 Else
  22.                     url = "http://www." & TextBox1.Text
  23.                 End If
  24.             End If
  25.         ElseIf (TextBox1.Text.ToLower().StartsWith("/watch")) Then
  26.             url = "http://www.youtube.com" & TextBox1.Text.ToLower()
  27.         ElseIf (TextBox1.Text.ToLower().StartsWith("watch")) Then
  28.             url = "http://www.youtube.com/" & TextBox1.Text.ToLower()
  29.         End If
  30.         Dim r As HttpWebRequest = HttpWebRequest.Create(url)
  31.         Dim re As HttpWebResponse = r.GetResponse
  32.         Dim src As String = New System.IO.StreamReader(re.GetResponseStream()).ReadToEnd()
  33.         Dim title2 As String = GetBetween(src, "<span id=""eow-title""", ">")
  34.         Dim title As String = GetBetween(title2, "title=""", """")
  35.         Dim desc As String = GetBetween(src, "<p id=""eow-description"" >", "</p>")
  36.         Dim likes As String = GetBetween(src, "<span class=""likes-count"">", "</span")
  37.         Dim dislikes As String = GetBetween(src, "<span class=""dislikes-count"">", "</span")
  38.         Dim views As String = GetBetween(src, "<span class=""watch-view-count "" >", "</span")
  39.         title = removeExtras(title, False)
  40.         desc = removeExtras(desc, False)
  41.         likes = removeExtras(likes)
  42.         dislikes = removeExtras(dislikes)
  43.         views = removeExtras(views)
  44.         Using sw As New System.IO.StreamWriter(fs.FileName)
  45.             sw.WriteLine(title)
  46.             sw.WriteLine(desc)
  47.             sw.WriteLine("Likes: " & likes)
  48.             sw.WriteLine("Dislikes: " & dislikes)
  49.             sw.WriteLine("Total Views: " & views)
  50.         End Using
  51.     End Sub
  52.  
  53.     Private Function removeExtras(ByVal s As String, Optional ByVal removeSpaces As Boolean = True)
  54.         Dim ret As String = s
  55.         If (s.Contains(" ") And removeSpaces) Then
  56.             ret = ""
  57.             For Each c As String In s
  58.                 If (Not c = " ") Then ret &= c
  59.             Next
  60.         End If
  61.         If (ret.Contains("<") And ret.Contains(">")) Then
  62.             Dim sa As Boolean = True
  63.             Dim temp As String = ""
  64.             For Each c As String In ret
  65.                 If (c = "<") Then sa = False
  66.                 If (c = ">") Then sa = True
  67.                 If (Not c = "<" And Not c = ">" And sa) Then
  68.                     temp &= c
  69.                 End If
  70.             Next
  71.             ret = temp
  72.         End If
  73.         If (ret.Contains("&quot;")) Then
  74.             ret = ret.Replace("&quot;", """")
  75.         End If
  76.         If (ret.Contains("&#39;")) Then ret = ret.Replace("&#39;", "'")
  77.         Return ret
  78.     End Function
  79. End Class

Comments

It is not working.I tryied to fix it and i did.But still not good the final result it gives u the html lol. i Fixed this instead of Index + 1 u should do index + 0 cuz 0 in programming means 1 faller. look Private Function GetBetween(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String, Optional ByVal Index As Integer = 0) As String Return Regex.Split(Regex.Split(Source, Str1)(Index + 0), Str2)(0) End Function This is how it works.And i changed it but the final result is an html file with the info of video.still not complete dude i wasted my time on that shit

In reply to by ProgrammerROmania (not verified)

Hi, Thanks for your fix, it works for me too, but how can i get only Video title, instead of complicated text in .txt file. Thks

Add new comment