Count # of occurences of all words in an MS Word document

E

Elizabeth

I need a tool which will count the number of occurrences of ALL words in a
Microsoft Word document; the output would be something like this:

tree 4
grows 3
city 6
ecology 2
the 78

etc ....

Anybody know of anything readily available which will do this ?

Thanks .......
 
G

Greg Maxey

Elizabeth:

The following macro works nicely:

Sub WordFrequency()

Dim SingleWord As String 'Raw word pulled from doc
Const maxwords = 9000 'Maximum unique words allowed
Dim Words(maxwords) As String 'Array to hold unique words
Dim Freq(maxwords) As Integer 'Frequency counter for Unique Words
Dim WordNum As Integer 'Number of unique words
Dim ByFreq As Boolean 'Flag for sorting order
Dim ttlwds As Long 'Total words in the document
Dim Excludes As String 'Words to be excluded
Dim Found As Boolean 'Temporary flag
Dim j, k, l, Temp As Integer 'Temporary variables
Dim IngWordCount As Long 'Total non-excluded words in document
Dim NonWordObjects As Long
Dim AllWordOjects As Long
Dim TotalWords As Long
Dim tword As String '

'Set up excluded words
'Excludes = "[pickleloaf][gruntbutter]"
'Excludes = Excludes & InputBox$("The following words are excluded by
default: " & Excludes & ". Enter additional words that you wish to exclude,
surrounding each word with [ ].", "Excluded Words", "")
Excludes = InputBox$("Enter words that you wish to exclude. Place each word
within square brackets [ ]. Example: [is][a].", "Excluded Words", "")

'Find out how to sort
ByFreq = True
Ans = InputBox$("Default sort order is word freqeuncy. To sort
alphabetically by word, type Word in the field below.", "Sort order",
"FREQ")
If Ans = "" Then End
If UCase(Ans) = "WORD" Then
ByFreq = False
End If

Selection.HomeKey Unit:=wdStory
System.Cursor = wdCursorWait
WordNum = 0
ttlwds = ActiveDocument.Words.Count
'AllWordObjects = ActiveDocument.Words.Count
'TotalWords = NonWordObjects

'Control the repeat
For Each aword In ActiveDocument.Words
SingleWord = Trim(LCase(aword))
If SingleWord < "a" Or SingleWord > "z" Then SingleWord = "" 'Out of range?
If SingleWord < "a" Or SingleWord > "z" Then NonWordObjects = NonWordObjects
+ 1
'SingleWord = Trim(aword)
'If SingleWord < "A" Or SingleWord > "z" Then SingleWord = "" 'Out of range?
If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord = "" 'On exclude
list?
If Len(SingleWord) > 0 Then
IngWordCount = IngWordCount + 1
Found = False
For j = 1 To WordNum
If Words(j) = SingleWord Then
Freq(j) = Freq(j) + 1
Found = True
Exit For
End If
Next j
If Not Found Then
WordNum = WordNum + 1
Words(WordNum) = SingleWord
Freq(WordNum) = 1
End If
If WordNum > maxwords - 1 Then
j = MsgBox("The maximum array size has been exceeded. Increase
maxwords.", vbOKOnly)
Exit For
End If
End If
ttlwds = ttlwds - 1
StatusBar = "Remaining: " & ttlwds & " Unique: " & WordNum
Next aword

'Now sort it into word order
For j = 1 To WordNum - 1
k = j
For l = j + 1 To WordNum
If (Not ByFreq And Words(l) < Words(k)) Or (ByFreq And Freq(l) > Freq(k))
Then k = l
Next l
If k <> j Then
tword = Words(j)
Words(j) = Words(k)
Words(k) = tword
Temp = Freq(j)
Freq(j) = Freq(k)
Freq(k) = Temp
End If
StatusBar = "Sorting: " & WordNum - j
Next j

AllWordObjects = ActiveDocument.Words.Count
NonWordObjects = NonWordObjects
TotalWords = AllWordObjects - NonWordObjects

'Now write out the results
tmpName = ActiveDocument.AttachedTemplate.FullName
Documents.Add Template:=tmpName, NewTemplate:=False
Selection.ParagraphFormat.TabStops.ClearAll
With Selection
For j = 1 To WordNum
..TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
Next j
End With
ActiveDocument.Range.Select
Selection.ConvertToTable
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows.Add BeforeRow:=Selection.Rows(1)
ActiveDocument.Tables(1).Cell(1, 1).Range.InsertBefore "Unique Words"
ActiveDocument.Tables(1).Cell(1, 2).Range.InsertBefore "Number of
Occurrences"
ActiveDocument.Tables(1).Columns(2).Select
Selection.ParagraphFormat.Alignment = wdAlignParagraphRight
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows(1).Shading.BackgroundPatternColor =
wdColorGray20
ActiveDocument.Tables(1).Columns(1).PreferredWidth = InchesToPoints(4.75)
ActiveDocument.Tables(1).Columns(2).PreferredWidth = InchesToPoints(1.9)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Summary"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore "Total"
ActiveDocument.Tables(1).Rows(ActiveDocument.Tables(1).Rows.Count).Shading.BackgroundPatternColor
= wdColorGray20


ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Unique Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore Trim(Str(WordNum))
ActiveDocument.Tables(1).Rows(ActiveDocument.Tables(1).Rows.Count).Shading.BackgroundPatternColor
= wdColorAutomatic

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Non-Excluded Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore (IngWordCount)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Words (Excluded and Non-Excluded) in
Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore (TotalWords)
System.Cursor = wdCursorNormal

MsgBox "This document contains " & Trim(Str(WordNum)) & " unique words. "
MsgBox "This document contains " & IngWordCount & " non-excluded words. "
MsgBox "This document contains a total of " & TotalWords & " (excluded and
non-excluded) words. "
MsgBox "For more statistics on this document, use Tools>Word Count in the
original document. "

Selection.HomeKey wdStory

End Sub
 
E

Elizabeth

Hi Greg ..

Greg Maxey said:
Elizabeth:

The following macro works nicely:

WOW !! that was fast; I didn't know if I picked the right group or if I
would ever get a reply ... I thought maybe this was one of those
old-fashioned computer science exercises that nobody bothered with anymore

Thanks a bunch; it does exactly what I want it to ... at the right price too
! Even runs fast ...

Liz



Sub WordFrequency()

Dim SingleWord As String 'Raw word pulled from doc
Const maxwords = 9000 'Maximum unique words allowed
Dim Words(maxwords) As String 'Array to hold unique words
Dim Freq(maxwords) As Integer 'Frequency counter for Unique Words
Dim WordNum As Integer 'Number of unique words
Dim ByFreq As Boolean 'Flag for sorting order
Dim ttlwds As Long 'Total words in the document
Dim Excludes As String 'Words to be excluded
Dim Found As Boolean 'Temporary flag
Dim j, k, l, Temp As Integer 'Temporary variables
Dim IngWordCount As Long 'Total non-excluded words in document
Dim NonWordObjects As Long
Dim AllWordOjects As Long
Dim TotalWords As Long
Dim tword As String '

'Set up excluded words
'Excludes = "[pickleloaf][gruntbutter]"
'Excludes = Excludes & InputBox$("The following words are excluded by
default: " & Excludes & ". Enter additional words that you wish to exclude,
surrounding each word with [ ].", "Excluded Words", "")
Excludes = InputBox$("Enter words that you wish to exclude. Place each word
within square brackets [ ]. Example: [is][a].", "Excluded Words", "")

'Find out how to sort
ByFreq = True
Ans = InputBox$("Default sort order is word freqeuncy. To sort
alphabetically by word, type Word in the field below.", "Sort order",
"FREQ")
If Ans = "" Then End
If UCase(Ans) = "WORD" Then
ByFreq = False
End If

Selection.HomeKey Unit:=wdStory
System.Cursor = wdCursorWait
WordNum = 0
ttlwds = ActiveDocument.Words.Count
'AllWordObjects = ActiveDocument.Words.Count
'TotalWords = NonWordObjects

'Control the repeat
For Each aword In ActiveDocument.Words
SingleWord = Trim(LCase(aword))
If SingleWord < "a" Or SingleWord > "z" Then SingleWord = "" 'Out of range?
If SingleWord < "a" Or SingleWord > "z" Then NonWordObjects = NonWordObjects
+ 1
'SingleWord = Trim(aword)
'If SingleWord < "A" Or SingleWord > "z" Then SingleWord = "" 'Out of range?
If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord = "" 'On exclude
list?
If Len(SingleWord) > 0 Then
IngWordCount = IngWordCount + 1
Found = False
For j = 1 To WordNum
If Words(j) = SingleWord Then
Freq(j) = Freq(j) + 1
Found = True
Exit For
End If
Next j
If Not Found Then
WordNum = WordNum + 1
Words(WordNum) = SingleWord
Freq(WordNum) = 1
End If
If WordNum > maxwords - 1 Then
j = MsgBox("The maximum array size has been exceeded. Increase
maxwords.", vbOKOnly)
Exit For
End If
End If
ttlwds = ttlwds - 1
StatusBar = "Remaining: " & ttlwds & " Unique: " & WordNum
Next aword

'Now sort it into word order
For j = 1 To WordNum - 1
k = j
For l = j + 1 To WordNum
If (Not ByFreq And Words(l) < Words(k)) Or (ByFreq And Freq(l) > Freq(k))
Then k = l
Next l
If k <> j Then
tword = Words(j)
Words(j) = Words(k)
Words(k) = tword
Temp = Freq(j)
Freq(j) = Freq(k)
Freq(k) = Temp
End If
StatusBar = "Sorting: " & WordNum - j
Next j

AllWordObjects = ActiveDocument.Words.Count
NonWordObjects = NonWordObjects
TotalWords = AllWordObjects - NonWordObjects

'Now write out the results
tmpName = ActiveDocument.AttachedTemplate.FullName
Documents.Add Template:=tmpName, NewTemplate:=False
Selection.ParagraphFormat.TabStops.ClearAll
With Selection
For j = 1 To WordNum
.TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
Next j
End With
ActiveDocument.Range.Select
Selection.ConvertToTable
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows.Add BeforeRow:=Selection.Rows(1)
ActiveDocument.Tables(1).Cell(1, 1).Range.InsertBefore "Unique Words"
ActiveDocument.Tables(1).Cell(1, 2).Range.InsertBefore "Number of
Occurrences"
ActiveDocument.Tables(1).Columns(2).Select
Selection.ParagraphFormat.Alignment = wdAlignParagraphRight
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows(1).Shading.BackgroundPatternColor =
wdColorGray20
ActiveDocument.Tables(1).Columns(1).PreferredWidth = InchesToPoints(4.75)
ActiveDocument.Tables(1).Columns(2).PreferredWidth = InchesToPoints(1.9)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Summary"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore "Total"
ActiveDocument.Tables(1).Rows(ActiveDocument.Tables(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorGray20


ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Unique Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore Trim(Str(WordNum))
ActiveDocument.Tables(1).Rows(ActiveDocument.Tables(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorAutomatic

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Non-Excluded Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore (IngWordCount)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
1).Range.InsertBefore "Number of Words (Excluded and Non-Excluded) in
Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Tables(1).Rows.Count,
2).Range.InsertBefore (TotalWords)
System.Cursor = wdCursorNormal

MsgBox "This document contains " & Trim(Str(WordNum)) & " unique words. "
MsgBox "This document contains " & IngWordCount & " non-excluded words. "
MsgBox "This document contains a total of " & TotalWords & " (excluded and
non-excluded) words. "
MsgBox "For more statistics on this document, use Tools>Word Count in the
original document. "

Selection.HomeKey wdStory

End Sub
--
Greg Maxey/Word MVP
A Peer in Peer to Peer Support
I need a tool which will count the number of occurrences of ALL words
in a Microsoft Word document; the output would be something like this:

tree 4
grows 3
city 6
ecology 2
the 78

etc ....

Anybody know of anything readily available which will do this ?

Thanks .......
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top