User:Decimus Schomer/Scripts/Grammar analyser
Decimus' user page | Decimus' talk page | Decimus' scripts | Decimus' script libraries | Decimus' projects
Main scripts page | Toggling Rotate script | UUID-getter scripts | Texture changer | Channel spier | Chatbot | Jump slab | Emailer | Fractal viewer | Grammar analyser | SPD viewer
About
This script, written by Decimus, is a basic grammatical analyser - it analyses the structure of anything said to it.
If someone could add a smiley filter to it, that would be appreciated :D
The Python version
This version asks you for text on a command line. To exit the program, and to produce a file called analyse.dump containing information on what it has extracted from what you said, type '_quit' at the prompt.
# Grammar analyser - analyse the grammatical composition of sentences # Copyright (C) 2007 Decimus Schomer # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # (you can find version 2 of the GPL at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html) # Data storage groups = [] grammars = [] # Get the group (index in the groups list) which contains word # If word is not in any group, make a new one to contain it def get_group(word): for i in range(len(groups)): if word in groups[i]: return i n = len(groups) groups.append([word]) return n # Merge the groups i1 and i2 into a single group def _merge_groups(i1, i2): global grammars # Ensure that i2 is the larger number if i1 > i2: i1, i2 = i2, i1 # Add all the items in the second group to the items in the first groups[i1] += groups[i2] # Delete the second group del groups[i2] # Ensure that each grammar is correct now the merger has been done for x in range(len(grammars)): g = grammars[x] for y in range(len(g)): # Replace all references to the group that was deleted # with references to the merged group if g[y] == i2: g[y] = i1 # Decrement all references to groups after the deleted one by one. # This is due to the deletion of the second group elif g[y] > i2: g[y] -= 1 # Delete all the duplicate grammars gcopy = set() for g in grammars: gcopy.add(tuple(g)) grammars = [] for g in gcopy: grammars.append(list(g)) # Merge any groups with identical or near-identical usage def group_merge(): num_grams = len(grammars) diffs = [] # Run through each discrete pair of grammars for x in range(num_grams-1): gx = grammars[x] for y in range(x + 1, num_grams): gy = grammars[y] # Only work with pairs of same-length grammars if len(gx) == len(gy): f = 0 idx = 0 # Run through each part of the grammars, # looking for a *single* difference # Multiple differences are discarded for z in range(len(gx)): if gx[z] != gy[z]: if f: break f = 1 idx = z # If we found a single difference: # * if a similar difference has already been found, # increment its count # * if no similar differences have been found, add a new entry else: if f: for df in diffs: if (df[0] == gx[idx]) and (df[1] == gy[idx]): df[2] += 1 break else: diffs.append([gx[idx], gy[idx], 1]) # Run through each difference with a count of at least 5 # and merge the groups in it done = [] for g1, g2, n in diffs: if n >= 5: # Correct the entries - groups are deleted at each iteration! for i1, i2 in done: if g1 == i1: g1 = i2 elif g1 > i1: g1 -= 1 if g2 == i1: g2 = i2 elif g2 > i1: g2 -= 1 # Perform the merger _merge_groups(g1, g2) # Add the group pair to the done list done.append((g2, g1)) # Main loop while 1: s = raw_input("Enter sentence(s): ").strip() # Take the lowercase version of the string and remove unimportant punctuation st = s.lower() st = "".join(st.split(",")) # Commands if st == "_quit": break # Split the string into sentences l1 = st.split(".") if l1[-1] == "": l1 = l1[:-1] l2 = [] l3 = [] for itm in l1: l = itm.split("!") if l[-1] == "": l = l[:-1] l2 += l for itm in l2: l = itm.split("?") if l[-1] == "": l = l[:-1] l3 += l # Handle each sentence separately for sntnc in l3: wo = [] # Process each word words = sntnc.split() for word in words: # Append the group which the word is in to the 'wo' list wo.append(get_group(word)) # Run through each grammar, checking if it applies to the sentence for g in grammars: if len(g) == len(wo): for i in range(len(wo)): if wo[i] != g[i]: break # If the sentence has a new grammar, add the new grammar to the grammar list else: grammars.append(wo) # Perform any possible group merging group_merge() # Dump the group and grammar lists in a human-readable(-ish) form f = open("analyse.dump", "w") f.write("Groups:\n=======\n\n") for i in range(len(groups)): f.write(("%2d: " % i) + ", ".join(word for word in groups[i]) + "\n") f.write("\nGrammars:\n=========\n\n") for i in range(len(grammars)): f.write(("%2d: " % i) + " ".join(("%2d" % n) for n in grammars[i]) + "\n") f.close()
The LSL version
This version analyses any text said near it.
NOTE: THIS SHOULD WORK, BUT AS OF YET IT IS UNTESTED (AND, AS OF THE LATEST TEST, THE MERGER FUNCTION DOESN'T WORK)
- It doesn't seem to. - Katharine Berry 20:57, 18 July 2007 (BST)
- It seems that the problem is in set_add_int_list(). I'm going to have a look at what it is specifically now.
- Found the problem - you forgot to copy the mda_val lists back. I've fixed it here.
- Thanks. (appropriately enough, I just came to do exactly that - I also noticed that :p) --Decimus
- Found the problem - you forgot to copy the mda_val lists back. I've fixed it here.
- It seems that the problem is in set_add_int_list(). I'm going to have a look at what it is specifically now.
// Grammar analyser - analyse the grammatical composition of sentences // Copyright (C) 2007 Decimus Schomer // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. // (you can find version 2 of the GPL at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html) ////////////////////////////////////////////////////////////////////// // Multi-dimensional array library list mda_val_lens; list mda_val; string mda_get_string(integer x, integer y) { integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y-1))); if (x >= llList2Integer(mda_val_lens, y)) return ""; return llList2String(mda_val, idx+x); } integer mda_get_integer(integer x, integer y) { integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y-1))); if (x >= llList2Integer(mda_val_lens, y)) return -1; return llList2Integer(mda_val, idx+x); } integer mda_len(integer idx) { if (idx == -1) return llGetListLength(mda_val_lens); return llList2Integer(mda_val_lens, idx); } mda_append_string(integer y, string val) { integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y))); mda_val = llListInsertList(mda_val, [val], idx); mda_val_lens = llListReplaceList(mda_val_lens, [llList2Integer(mda_val_lens, y) + 1], y, y); } mda_append_integer(integer y, integer val) { integer idx = llRound(llListStatistics(LIST_STAT_SUM,llList2List(mda_val_lens,0,y))); mda_val = llListInsertList(mda_val, [val], idx); mda_val_lens = llListReplaceList(mda_val_lens, [llList2Integer(mda_val_lens, y) + 1], y, y); } mda_append_row_str(string s) { mda_val_lens = (mda_val_lens=[])+mda_val_lens+[1]; mda_val = (mda_val=[])+mda_val+[s]; } ////////////////////////////////////////////////////////////////////// // Set library set_add_list(list val) { integer lenv = llGetListLength(val); integer x; integer y; list l; integer s = 0; integer d; integer mlen = mda_len(-1); integer f; for (y = 0; y < mlen; ++y) { d = llList2Integer(mda_val_lens, y); if (d == lenv) { f = 0; l = llList2List(mda_val, s, s+d); for (x = 0; x < d; ++x) { if (llList2String(l, x) != llList2String(val, x)) { f = 1; } } if (! f) return; } s += d; } mda_val += val; mda_val_lens += [lenv]; } mda_to_set() { integer len = mda_len(-1); list orig_lens = mda_val_lens; list orig = mda_val; mda_val_lens = []; mda_val = []; integer i; integer s; integer d; list l; for (i = 0; i < len; ++i) { d = llList2Integer(orig_lens, i); l = llList2List(orig, s, s+d); set_add_list(l); s += d; } } ////////////////////////////////////////////////////////////////////// // Main script list group_lens = []; list groups = []; list grammar_lens = []; list grammars = []; integer get_group(string word) { mda_val_lens = group_lens; mda_val = groups; integer x; integer y; for (y = 0; y < mda_len(-1); y++) for (x = 0; x < mda_len(y); x++) if (mda_get_string(x, y) == word) return y; llWhisper(0, "Adding new group (" + (string)y + ") containing '" + word + "'"); mda_append_row_str(word); groups = mda_val; group_lens = mda_val_lens; return y; } list split_sentences(string s) { list l = []; string st = ""; integer i; for (i = 0; i < llStringLength(s); i++) { string c = llGetSubString(s, i, i); if (c == "." || c == "!" || c == "?") { if (st) { l += [st]; st = ""; } } else if ((c != ",") && (c != ";") && ((c != " ") || (st != ""))) st += c; } if (st) l += [st]; return l; } list split_words(string s) { list l = []; string st = ""; integer i; for (i = 0; i < llStringLength(s); i++) { string c = llGetSubString(s, i, i); if (c == " ") { if (st) { l += [st]; st = ""; } } else st += c; } if (st) l += [st]; return l; } _merge_groups(integer g1, integer g2) { mda_val = groups; mda_val_lens = group_lens; integer i; integer n; integer idx1 = 0; integer idx2; integer idx3; list l; if (g1 > g2) { integer tmp = g1; g1 = g2; g2 = tmp; } for (i = 0; i < g1+1; i++) idx1 += llList2Integer(mda_val_lens, i); idx2 = idx1; for (; i < g2; i++) idx2 += llList2Integer(mda_val_lens, i); idx3 = idx2 + llList2Integer(mda_val_lens, g2) - 1; mda_val = llList2List(mda_val, 0, idx1) + llList2List(mda_val, idx2, idx3) + llList2List(mda_val, idx1+1, idx2-1) + llList2List(mda_val, idx3+1, llGetListLength(mda_val)); mda_val_lens = llList2List(mda_val_lens, 0, g1-1) + [llList2Integer(mda_val_lens, g1) + llList2Integer(mda_val_lens, g2)] + llList2List(mda_val_lens, g1+1, g2-1) + llList2List(mda_val_lens, g2+1, mda_len(-1)); groups = mda_val; group_lens = mda_val_lens; mda_val = grammars; mda_val_lens = grammar_lens; idx1 = 0; idx2 = 0; for (i = 0; i < mda_len(-1); i++) { idx1 += llList2Integer(mda_val_lens, i); for (idx3 = idx2; idx3 < idx1; idx3++) { n = llList2Integer(mda_val, idx3); if (n == g2) llListReplaceList(mda_val, [g1], idx3, idx3); else if (n > g2) llListReplaceList(mda_val, [n-1], idx3, idx3); } idx2 = idx1; } mda_to_set(); grammars = mda_val; grammar_lens = mda_val_lens; } group_merge() { mda_val = grammars; mda_val_lens = grammar_lens; list diffs = []; list done = []; integer num_grams = mda_len(-1); integer x; integer idxx = 0; integer y; integer idxy; integer idxz; integer tmp1; integer tmp2; integer f; integer idx; list lx; list ly; for (x = 0; x < num_grams-1; x++) { tmp1 = llList2Integer(mda_val_lens, x); idxy = idxx + tmp1; lx = llList2List(mda_val, idxx, idxy-1); for (y = x + 1; y < num_grams; y++) { tmp2 = llList2Integer(mda_val_lens, y); if (tmp1 == tmp2) { ly = llList2List(mda_val, idxy, idxy+tmp2-1); f = 0; idx = 0; for (idxz = 0; (idxz < tmp1) && (f != 2); idxz++) if (llList2Integer(lx, idxz) != llList2Integer(ly, idxz)) { if (f) f = 2; else { f = 1; idx = idxz; } } if (f == 1) { f = 0; for (idxz = 0; (idxz < (llGetListLength(diffs) / 3)) && (! f); idxz++) { if ((llList2Integer(diffs, 3*idx) == llList2Integer(lx, idx)) && (llList2Integer(diffs, (3*idx)+1) == llList2Integer(ly, idx))) { llListReplaceList(diffs, [llList2Integer(diffs, 3*(idx)+2)+1], 3*(idx)+2, 3*(idx)+2); f = 1; } } if (! f) diffs += [llList2Integer(lx, idx), llList2Integer(ly, idx), 1]; } } idxy += tmp2; } idxx = idxy; } for (idx = 0; idx < (llGetListLength(diffs) / 3); idx++) { idxz = llList2Integer(diffs, (3*idx)+2); if (idxz >= 5) { idxx = llList2Integer(diffs, 3*idx); idxy = llList2Integer(diffs, (3*idx)+1); for (x = 0; x < (llGetListLength(done)/2); x++) { tmp1 = llList2Integer(done, 2*x); tmp2 = llList2Integer(done, (2*x)+1); if (idxx > tmp1) idxx -= 1; else if (idxx == tmp1) idxx = tmp2; if (idxy > tmp1) idxy -= 1; else if (idxy == tmp1) idxy = tmp2; } _merge_groups(idxx, idxy); done += [idxy, idxx]; } } } default { state_entry() { llListen(2, "", "", ""); llListen(1, "", "", "LIST"); llListen(1, "", "", "MERGE"); } listen(integer channel, string name, key id, string msg) { if (channel == 1) { if (msg == "LIST") { mda_val = groups; mda_val_lens = group_lens; integer i; integer s = 0; integer e = 0; llSay(0, "Groups:"); for (i = 0; i < mda_len(-1); i++) { e += llList2Integer(mda_val_lens, i); llSay(0, "(" + (string)i + ") " + llList2CSV(llList2List(mda_val, s, e-1))); s = e; } mda_val = grammars; mda_val_lens = grammar_lens; s = 0; e = 0; llSay(0, "Grammars:"); for (i = 0; i < mda_len(-1); i++) { e += llList2Integer(mda_val_lens, i); llSay(0, "(" + (string)i + ") " + llList2CSV(llList2List(mda_val, s, e-1))); s = e; } } else if (msg == "MERGE") group_merge(); return; } list lx; list ly = split_sentences(llToLower(msg)); list wo; integer x; integer y; for (y = 0; y < llGetListLength(ly); y++) { wo = []; lx = split_words(llList2String(ly, y)); for (x = 0; x < llGetListLength(lx); x++) wo += [get_group(llList2String(lx, x))]; mda_val_lens = grammar_lens; mda_val = grammars; set_add_list(wo); grammar_lens = mda_val_lens; grammars = mda_val; } } }