User:Decimus Schomer/Scripts/Grammar analyser
From The Schommunity Wiki
Decimus' user page | Decimus' talk page | Decimus' scripts | Decimus' script libraries | Decimus' projects
Main scripts page | Toggling Rotate script | UUID-getter scripts | Texture changer | Channel spier | Chatbot | Jump slab | Emailer | Fractal viewer | Grammar analyser | SPD viewer
About
This script, written by Decimus, is a basic grammatical analyser - it analyses the structure of anything said to it.
If someone could add a smiley filter to it, that would be appreciated :D
The Python version
This version asks you for text on a command line. To exit the program, and to produce a file called analyse.dump containing information on what it has extracted from what you said, type '_quit' at the prompt.
# Grammar analyser - analyse the grammatical composition of sentences
# Copyright (C) 2007 Decimus Schomer
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# (you can find version 2 of the GPL at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html)
# Data storage
groups = []
grammars = []
# Get the group (index in the groups list) which contains word
# If word is not in any group, make a new one to contain it
def get_group(word):
for i in range(len(groups)):
if word in groups[i]:
return i
n = len(groups)
groups.append([word])
return n
# Merge the groups i1 and i2 into a single group
def _merge_groups(i1, i2):
global grammars
# Ensure that i2 is the larger number
if i1 > i2:
i1, i2 = i2, i1
# Add all the items in the second group to the items in the first
groups[i1] += groups[i2]
# Delete the second group
del groups[i2]
# Ensure that each grammar is correct now the merger has been done
for x in range(len(grammars)):
g = grammars[x]
for y in range(len(g)):
# Replace all references to the group that was deleted
# with references to the merged group
if g[y] == i2:
g[y] = i1
# Decrement all references to groups after the deleted one by one.
# This is due to the deletion of the second group
elif g[y] > i2:
g[y] -= 1
# Delete all the duplicate grammars
gcopy = set()
for g in grammars:
gcopy.add(tuple(g))
grammars = []
for g in gcopy:
grammars.append(list(g))
# Merge any groups with identical or near-identical usage
def group_merge():
num_grams = len(grammars)
diffs = []
# Run through each discrete pair of grammars
for x in range(num_grams-1):
gx = grammars[x]
for y in range(x + 1, num_grams):
gy = grammars[y]
# Only work with pairs of same-length grammars
if len(gx) == len(gy):
f = 0
idx = 0
# Run through each part of the grammars,
# looking for a *single* difference
# Multiple differences are discarded
for z in range(len(gx)):
if gx[z] != gy[z]:
if f:
break
f = 1
idx = z
# If we found a single difference:
# * if a similar difference has already been found,
# increment its count
# * if no similar differences have been found, add a new entry
else:
if f:
for df in diffs:
if (df[0] == gx[idx]) and (df[1] == gy[idx]):
df[2] += 1
break
else:
diffs.append([gx[idx], gy[idx], 1])
# Run through each difference with a count of at least 5
# and merge the groups in it
done = []
for g1, g2, n in diffs:
if n >= 5:
# Correct the entries - groups are deleted at each iteration!
for i1, i2 in done:
if g1 == i1:
g1 = i2
elif g1 > i1:
g1 -= 1
if g2 == i1:
g2 = i2
elif g2 > i1:
g2 -= 1
# Perform the merger
_merge_groups(g1, g2)
# Add the group pair to the done list
done.append((g2, g1))
# Main loop
while 1:
s = raw_input("Enter sentence(s): ").strip()
# Take the lowercase version of the string and remove unimportant punctuation
st = s.lower()
st = "".join(st.split(","))
# Commands
if st == "_quit":
break
# Split the string into sentences
l1 = st.split(".")
if l1[-1] == "":
l1 = l1[:-1]
l2 = []
l3 = []
for itm in l1:
l = itm.split("!")
if l[-1] == "":
l = l[:-1]
l2 += l
for itm in l2:
l = itm.split("?")
if l[-1] == "":
l = l[:-1]
l3 += l
# Handle each sentence separately
for sntnc in l3:
wo = []
# Process each word
words = sntnc.split()
for word in words:
# Append the group which the word is in to the 'wo' list
wo.append(get_group(word))
# Run through each grammar, checking if it applies to the sentence
for g in grammars:
if len(g) == len(wo):
for i in range(len(wo)):
if wo[i] != g[i]:
break
# If the sentence has a new grammar, add the new grammar to the grammar list
else:
grammars.append(wo)
# Perform any possible group merging
group_merge()
# Dump the group and grammar lists in a human-readable(-ish) form
f = open("analyse.dump", "w")
f.write("Groups:\n=======\n\n")
for i in range(len(groups)):
f.write(("%2d: " % i) + ", ".join(word for word in groups[i]) + "\n")
f.write("\nGrammars:\n=========\n\n")
for i in range(len(grammars)):
f.write(("%2d: " % i) + " ".join(("%2d" % n) for n in grammars[i]) + "\n")
f.close()
The LSL version
This version analyses any text said near it.
NOTE: THIS SHOULD WORK, BUT AS OF YET IT IS UNTESTED (AND, AS OF THE LATEST TEST, THE MERGER FUNCTION DOESN'T WORK)
- It doesn't seem to. - Katharine Berry 20:57, 18 July 2007 (BST)
- It seems that the problem is in set_add_int_list(). I'm going to have a look at what it is specifically now.
- Found the problem - you forgot to copy the mda_val lists back. I've fixed it here.
- Thanks. (appropriately enough, I just came to do exactly that - I also noticed that :p) --Decimus
- Found the problem - you forgot to copy the mda_val lists back. I've fixed it here.
- It seems that the problem is in set_add_int_list(). I'm going to have a look at what it is specifically now.
// Grammar analyser - analyse the grammatical composition of sentences
// Copyright (C) 2007 Decimus Schomer
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
// (you can find version 2 of the GPL at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html)
//////////////////////////////////////////////////////////////////////
// Multi-dimensional array library
list mda_val_lens;
list mda_val;
string mda_get_string(integer x, integer y)
{
integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y-1)));
if (x >= llList2Integer(mda_val_lens, y))
return "";
return llList2String(mda_val, idx+x);
}
integer mda_get_integer(integer x, integer y)
{
integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y-1)));
if (x >= llList2Integer(mda_val_lens, y))
return -1;
return llList2Integer(mda_val, idx+x);
}
integer mda_len(integer idx)
{
if (idx == -1)
return llGetListLength(mda_val_lens);
return llList2Integer(mda_val_lens, idx);
}
mda_append_string(integer y, string val)
{
integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y)));
mda_val = llListInsertList(mda_val, [val], idx);
mda_val_lens = llListReplaceList(mda_val_lens, [llList2Integer(mda_val_lens, y) + 1], y, y);
}
mda_append_integer(integer y, integer val)
{
integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y)));
mda_val = llListInsertList(mda_val, [val], idx);
mda_val_lens = llListReplaceList(mda_val_lens, [llList2Integer(mda_val_lens, y) + 1], y, y);
}
mda_append_row_str(string s)
{
mda_val_lens = (mda_val_lens=[])+mda_val_lens+[1];
mda_val = (mda_val=[])+mda_val+[s];
}
//////////////////////////////////////////////////////////////////////
// Set library
set_add_list(list val)
{
integer lenv = llGetListLength(val);
integer x;
integer y;
list l;
integer s = 0;
integer d;
integer mlen = mda_len(-1);
integer f;
for (y = 0; y < mlen; ++y)
{
d = llList2Integer(mda_val_lens, y);
if (d == lenv)
{
f = 0;
l = llList2List(mda_val, s, s+d);
for (x = 0; x < d; ++x)
{
if (llList2String(l, x) != llList2String(val, x))
{
f = 1;
}
}
if (! f)
return;
}
s += d;
}
mda_val += val;
mda_val_lens += [lenv];
}
mda_to_set()
{
integer len = mda_len(-1);
list orig_lens = mda_val_lens;
list orig = mda_val;
mda_val_lens = [];
mda_val = [];
integer i;
integer s;
integer d;
list l;
for (i = 0; i < len; ++i)
{
d = llList2Integer(orig_lens, i);
l = llList2List(orig, s, s+d);
set_add_list(l);
s += d;
}
}
//////////////////////////////////////////////////////////////////////
// Main script
list group_lens = [];
list groups = [];
list grammar_lens = [];
list grammars = [];
integer get_group(string word)
{
mda_val_lens = group_lens;
mda_val = groups;
integer x;
integer y;
for (y = 0; y < mda_len(-1); y++)
for (x = 0; x < mda_len(y); x++)
if (mda_get_string(x, y) == word)
return y;
llWhisper(0, "Adding new group (" + (string)y + ") containing '" + word + "'");
mda_append_row_str(word);
groups = mda_val;
group_lens = mda_val_lens;
return y;
}
list split_sentences(string s)
{
list l = [];
string st = "";
integer i;
for (i = 0; i < llStringLength(s); i++)
{
string c = llGetSubString(s, i, i);
if (c == "." || c == "!" || c == "?")
{
if (st)
{
l += [st];
st = "";
}
}
else if ((c != ",") && (c != ";") && ((c != " ") || (st != "")))
st += c;
}
if (st)
l += [st];
return l;
}
list split_words(string s)
{
list l = [];
string st = "";
integer i;
for (i = 0; i < llStringLength(s); i++)
{
string c = llGetSubString(s, i, i);
if (c == " ")
{
if (st)
{
l += [st];
st = "";
}
}
else
st += c;
}
if (st)
l += [st];
return l;
}
_merge_groups(integer g1, integer g2)
{
mda_val = groups;
mda_val_lens = group_lens;
integer i;
integer n;
integer idx1 = 0;
integer idx2;
integer idx3;
list l;
if (g1 > g2)
{
integer tmp = g1;
g1 = g2;
g2 = tmp;
}
for (i = 0; i < g1+1; i++)
idx1 += llList2Integer(mda_val_lens, i);
idx2 = idx1;
for (; i < g2; i++)
idx2 += llList2Integer(mda_val_lens, i);
idx3 = idx2 + llList2Integer(mda_val_lens, g2) - 1;
mda_val = llList2List(mda_val, 0, idx1) +
llList2List(mda_val, idx2, idx3) +
llList2List(mda_val, idx1+1, idx2-1) +
llList2List(mda_val, idx3+1, llGetListLength(mda_val));
mda_val_lens = llList2List(mda_val_lens, 0, g1-1) +
[llList2Integer(mda_val_lens, g1) +
llList2Integer(mda_val_lens, g2)] +
llList2List(mda_val_lens, g1+1, g2-1) +
llList2List(mda_val_lens, g2+1, mda_len(-1));
groups = mda_val;
group_lens = mda_val_lens;
mda_val = grammars;
mda_val_lens = grammar_lens;
idx1 = 0;
idx2 = 0;
for (i = 0; i < mda_len(-1); i++)
{
idx1 += llList2Integer(mda_val_lens, i);
for (idx3 = idx2; idx3 < idx1; idx3++)
{
n = llList2Integer(mda_val, idx3);
if (n == g2)
llListReplaceList(mda_val, [g1], idx3, idx3);
else if (n > g2)
llListReplaceList(mda_val, [n-1], idx3, idx3);
}
idx2 = idx1;
}
mda_to_set();
grammars = mda_val;
grammar_lens = mda_val_lens;
}
group_merge()
{
mda_val = grammars;
mda_val_lens = grammar_lens;
list diffs = [];
list done = [];
integer num_grams = mda_len(-1);
integer x;
integer idxx = 0;
integer y;
integer idxy;
integer idxz;
integer tmp1;
integer tmp2;
integer f;
integer idx;
list lx;
list ly;
for (x = 0; x < num_grams-1; x++)
{
tmp1 = llList2Integer(mda_val_lens, x);
idxy = idxx + tmp1;
lx = llList2List(mda_val, idxx, idxy-1);
for (y = x + 1; y < num_grams; y++)
{
tmp2 = llList2Integer(mda_val_lens, y);
if (tmp1 == tmp2)
{
ly = llList2List(mda_val, idxy, idxy+tmp2-1);
f = 0;
idx = 0;
for (idxz = 0; (idxz < tmp1) && (f != 2); idxz++)
if (llList2Integer(lx, idxz) != llList2Integer(ly, idxz))
{
if (f)
f = 2;
else
{
f = 1;
idx = idxz;
}
}
if (f == 1)
{
f = 0;
for (idxz = 0; (idxz < (llGetListLength(diffs) / 3)) && (! f); idxz++)
{
if ((llList2Integer(diffs, 3*idx) == llList2Integer(lx, idx)) &&
(llList2Integer(diffs, (3*idx)+1) == llList2Integer(ly, idx)))
{
llListReplaceList(diffs, [llList2Integer(diffs, 3*(idx)+2)+1], 3*(idx)+2, 3*(idx)+2);
f = 1;
}
}
if (! f)
diffs += [llList2Integer(lx, idx), llList2Integer(ly, idx), 1];
}
}
idxy += tmp2;
}
idxx = idxy;
}
for (idx = 0; idx < (llGetListLength(diffs) / 3); idx++)
{
idxz = llList2Integer(diffs, (3*idx)+2);
if (idxz >= 5)
{
idxx = llList2Integer(diffs, 3*idx);
idxy = llList2Integer(diffs, (3*idx)+1);
for (x = 0; x < (llGetListLength(done)/2); x++)
{
tmp1 = llList2Integer(done, 2*x);
tmp2 = llList2Integer(done, (2*x)+1);
if (idxx > tmp1)
idxx -= 1;
else if (idxx == tmp1)
idxx = tmp2;
if (idxy > tmp1)
idxy -= 1;
else if (idxy == tmp1)
idxy = tmp2;
}
_merge_groups(idxx, idxy);
done += [idxy, idxx];
}
}
}
default
{
state_entry()
{
llListen(2, "", "", "");
llListen(1, "", "", "LIST");
llListen(1, "", "", "MERGE");
}
listen(integer channel, string name, key id, string msg)
{
if (channel == 1)
{
if (msg == "LIST")
{
mda_val = groups;
mda_val_lens = group_lens;
integer i;
integer s = 0;
integer e = 0;
llSay(0, "Groups:");
for (i = 0; i < mda_len(-1); i++)
{
e += llList2Integer(mda_val_lens, i);
llSay(0, "(" + (string)i + ") " + llList2CSV(llList2List(mda_val, s, e-1)));
s = e;
}
mda_val = grammars;
mda_val_lens = grammar_lens;
s = 0;
e = 0;
llSay(0, "Grammars:");
for (i = 0; i < mda_len(-1); i++)
{
e += llList2Integer(mda_val_lens, i);
llSay(0, "(" + (string)i + ") " + llList2CSV(llList2List(mda_val, s, e-1)));
s = e;
}
}
else if (msg == "MERGE")
group_merge();
return;
}
list lx;
list ly = split_sentences(llToLower(msg));
list wo;
integer x;
integer y;
for (y = 0; y < llGetListLength(ly); y++)
{
wo = [];
lx = split_words(llList2String(ly, y));
for (x = 0; x < llGetListLength(lx); x++)
wo += [get_group(llList2String(lx, x))];
mda_val_lens = grammar_lens;
mda_val = grammars;
set_add_list(wo);
grammar_lens = mda_val_lens;
grammars = mda_val;
}
}
}