Rensselaer Center for Open Source Software

Added collocation feature extraction

2 files changed, 11 lines added, 1 line removed

Changes

--- analysis/sentiment/learner.py da4e5bcbdf66035be8f121754d4327886241e463
+++ analysis/sentiment/learner.py d3794e54a0bb48f5e87bc083d4cc928583c855a3
@@ -41,4 +41,14 @@
-    return dict([(word,True) for word in word_bag])  
+    d = dict([(word,True) for word in word_bag])
+
+    #Generate collocations
+    bigram_measures = nltk.collocations.BigramAssocMeasures()
+    finder = nltk.collocations.BigramCollocationFinder.from_words(s)
+
+    #Find 10 best collocations
+    l = finder.nbest(bigram_measures.pmi, 10)
+    d.update(dict([(collocation,True) for collocation in l]))
+
+    return d
DaBuzz • 63 weeks ago