1) How to extract people/person names from text?
2) How to extract company names from text?
3) How to extract entities from text?
For all of the above we will use Yahoo's Content Analysis API.
Here is simple code (gist)
function init(){
var contentanalysisURL="http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20contentanalysis.analyze%20where%20text%3D%22{text}%22&format=json&callback=?";
//this function will be called on json recieve
function receiveJSON(data){
//6. recieve Data
try{
//7.get list of all entities
var et=data.query.results.entities.entity;
var out=[];
var len=et.length;
for(var i=0;i<len;i++){
var eee=et[i];
//8.get entity name
var content=eee.text.content;
var startChar=eee.text.start;
var etypes="";
if(eee.types){
//9.get entity type
var typ=eee.types.type;
for(var j=0;j<typ.length;j++){
var ttt=typ[j].content;
if(ttt.indexOf('/')>=0){
etypes=etypes+" "+ ttt.substring(ttt.lastIndexOf('/')+1);
}
}
}
//10.Append to dumo array
out[out.length]={
"Entity":content,
"startChar":startChar,
"etypes":etypes,
};
}
//11.update container with output
$('#imgdiv').html("<pre>"+JSON.stringify(out,null,'\t')+"</pre>");
}catch(e){
$('#imgdiv').html("<pre>error</pre>");
}
}
//1. hook click event of btget
$('#btget').click(function (){
//2. Get text, length of text should be less than 512 or less.
//Note:when text is too large this will not work
var text=$('#tu').val();
//3.Update loding message
$('#imgdiv').html("<pre>Loading..........</pre>");
//4.Form a GET request, (YQL table )
var url=contentanalysisURL.replace("{text}",text);
//5. inject script block in dom with callback as receiveJSON
$.getJSON(url,receiveJSON);
});
}
$(document).ready(init);
Limitations with Code
- Does not work with large data, due to limit of URL size. Get Request fails on large URL size. To Fix this read my this article
- To Extract location name correctly, use geo.placemaker API of yahoo
No comments:
Post a Comment
Your comment will inspire me, Please leave your comment