Pages

How to extract people/person names from text? (javascript code)

This is a very simple guide which will answer questions like

1) How to extract people/person names from text?
2) How to extract company names from text?
3) How to extract entities from text?

For all of the above we will use Yahoo's Content Analysis API.

Here is simple code (gist)


function init(){
    var contentanalysisURL="http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20contentanalysis.analyze%20where%20text%3D%22{text}%22&format=json&callback=?";
    //this function will be called on json recieve
    function receiveJSON(data){
        //6. recieve Data

        try{
            //7.get list of all entities
            var et=data.query.results.entities.entity;

            var out=[];
            var len=et.length;
            for(var i=0;i<len;i++){
                var eee=et[i];
                //8.get entity name
                var content=eee.text.content;
                var startChar=eee.text.start;
                var etypes="";
                if(eee.types){
                    //9.get entity type
                    var typ=eee.types.type;
                    for(var j=0;j<typ.length;j++){
                        var ttt=typ[j].content;
                        if(ttt.indexOf('/')>=0){
                          etypes=etypes+" "+ ttt.substring(ttt.lastIndexOf('/')+1); 
                        }
                    }
                }
                //10.Append to dumo array
                out[out.length]={
                    "Entity":content,
                    "startChar":startChar,
                    "etypes":etypes,
                };
                
            }
            //11.update container with output
            $('#imgdiv').html("<pre>"+JSON.stringify(out,null,'\t')+"</pre>");
        }catch(e){
            $('#imgdiv').html("<pre>error</pre>");
        }

    }
    //1. hook click event of btget 
    $('#btget').click(function (){
        //2. Get text, length of text should be less than 512 or less. 
        //Note:when text is too large this will not work
        var text=$('#tu').val();
        //3.Update loding message

        $('#imgdiv').html("<pre>Loading..........</pre>");
        //4.Form a GET request, (YQL table )
        var url=contentanalysisURL.replace("{text}",text);
        //5. inject script block in dom with callback as receiveJSON
        $.getJSON(url,receiveJSON);
    });
}

$(document).ready(init);


  Working Demo  

Limitations with Code

  • Does not work with large data, due to limit of URL size. Get Request fails on large URL size. To Fix this read my this article
  • To Extract location name correctly, use geo.placemaker API of yahoo

No comments:

Post a Comment

Your comment will inspire me, Please leave your comment