Added collocation feature extraction
2 files changed, 11 lines added, 1 line removed
Changes
--- analysis/sentiment/learner.py da4e5bcbdf66035be8f121754d4327886241e463
+++ analysis/sentiment/learner.py d3794e54a0bb48f5e87bc083d4cc928583c855a3
@@ -41,4 +41,14 @@
- return dict([(word,True) for word in word_bag])
+ d = dict([(word,True) for word in word_bag])
+
+ #Generate collocations
+ bigram_measures = nltk.collocations.BigramAssocMeasures()
+ finder = nltk.collocations.BigramCollocationFinder.from_words(s)
+
+ #Find 10 best collocations
+ l = finder.nbest(bigram_measures.pmi, 10)
+ d.update(dict([(collocation,True) for collocation in l]))
+
+ return d