/**
*
* @author Colin Clausen
*/
public class YoutubeDecoder extends AbstractLinkDecoder {
private static final Logger LOG = LoggerFactory
.getLogger(YoutubeDecoder.class);
private static final String STREAM_MAP = "(?<=\"url_encoded_fmt_stream_map\": \").*?(?=\")";
private static final String URLS_IN_MAP = "(?<=url=).*?(?=,|$)";
private static final String URL_PART_A = "http://.*?(?=\\\\u0026)";
private static final String URL_PART_B = "sig=.*?(?=\\\\u0026)";
private static final String QUALITY_MATCH = "(?<=\\\\u0026quality=).*?(?=,|$)";
private static final String TYPE_MATCH = "(?<=\\\\u0026type=).*?(?=\\\\u0026)";
private enum QualityLevel {
small, medium, large, hd720, hd1080,
}
public int rateQuality(VideoLink l) {
if (l.getFileType().contains("webm")) { // we dont support webm
// conversion
return -1;
} else {
return QualityLevel.valueOf(l.getQuality()).ordinal();
}
}
public VideoDL getLinks(String fullCode) {
try {
VideoLink[] results = null;
Matcher titleMatcher = Pattern.compile(
"(?<=<title>).*?(?=- YouTube)",
Pattern.DOTALL).matcher(fullCode);
String vidName;
if (titleMatcher.find()) {
vidName = URLDecoder.decode(titleMatcher.group().replaceAll(
"[\\n\\r]", "").trim(), "UTF-8");
} else {
vidName = UUID.randomUUID().toString();
}
Matcher linkRegion = Pattern.compile(
STREAM_MAP,
Pattern.DOTALL).matcher(fullCode);
if (linkRegion.find()) {
String streamMap = linkRegion.group();
Matcher urlsMatch = Pattern.compile(URLS_IN_MAP, Pattern.DOTALL).matcher(streamMap);
List<String> codes = new ArrayList<String>();
while(urlsMatch.find()) {
codes.add(urlsMatch.group());
}
results = new VideoLink[codes.size()];
for (int i = 0; i < codes.size(); i++) {
String link = URLDecoder.decode(URLDecoder.decode(codes.get(i), "UTF-8"), "UTF-8");
Matcher partAFinder = Pattern.compile(URL_PART_A, Pattern.DOTALL).matcher(link);
Matcher partBFinder = Pattern.compile(URL_PART_B, Pattern.DOTALL).matcher(link);
Matcher qualityFinder = Pattern.compile(QUALITY_MATCH, Pattern.DOTALL).matcher(link);
Matcher typeFinder = Pattern.compile(TYPE_MATCH, Pattern.DOTALL).matcher(link);
if (partAFinder.find() && partBFinder.find()) {
String fileLink = partAFinder.group()+"&"+partBFinder.group().replace("sig", "signature");
String quality = qualityFinder.find() ? qualityFinder.group() : "medium";
String type = typeFinder.find() ? typeFinder.group() : "unknown";
results[i] = new VideoLink(vidName, fileLink, type,
quality, this);
} else {
LOG.info("failed to parse: " + link);
}
}
} else {
LOG.info("failed to parse the whole page :(");
}
ArrayList<VideoLink> links = new ArrayList<VideoLink>();
for (VideoLink l : results) {
if (l != null) {
links.add(l);
}
}
return new VideoDL(vidName, this, links.toArray(new VideoLink[links
.size()]));
} catch (Exception ex) {
LOG.error("could not parse link!", ex);
throw new RuntimeException(ex);
}
}
public String getSiteHostName() {
return "www.youtube.com";
}
}