import os
os.getcwd()
# os.listdir()
# os.chdir('..')
# os.listdir()

'/Users/young.nick/Documents/GitHub/Carol-B/Carol-B'

## Import the word dictionary from the PC
## Note important to specify 'str' data type when reading to avoid accidentally capturing 'false' as a boolean and converting to 'FALSE'

import pandas as pd

## wordsdf is list of valid guesses - length 6,591, narrowed down from a list of around 12,000 5 letter words found online. Most guesses are almost certainly not valid solutions
# wordsdf = pd.read_excel(r"1. IO files/Guess_list.xlsx",sheet_name='List',dtype={'Solution': str})

## solution_list is a list of ~2,000 likely valid solutions to wordle. It is not an official solution list (there is none) and does not contain all solutions (e.g., did not have 'laser', a recent solution)
solution_list = pd.read_excel(r"1. IO files/Solution_list.xlsx",sheet_name='Solution_list',dtype={'Solution': str})

## Build gsPairs (guess-solution Pairs) from scratch: cartesian product of guess list with itself. This 

# from itertools import product
# gsPairs = pd.DataFrame(product(wordsdf['Solution'],wordsdf['Solution']),columns=['Guess','Solution'])
# print(f'{len(gsPairs):,}')
# print(f'{len(wordsdf)*len(wordsdf):,}')

## Pattern function is wrong! Try trout = guess and tutor = solution

def pattern_func(guess,solution):
    pattern = [0,0,0,0,0]
    matches = [0,0,0,0,0]
    counts = [0,0,0,0,0]
    for x in range(0,5):
        if guess[x] == solution[x]:
            pattern[x] = 2
        else:
            for y in range(0,5):
                if guess[x] == solution[y] and guess[y] != solution[y]:
                    matches[x]+=1
            if matches[x]>0:
                for y in range(0,x):
                    if guess[x] == guess[y]:
                        counts[x]+=1
                if matches[x] > counts[x]:
                    pattern[x] = 1
    return ''.join(map(str,pattern))

def pattern(row):
    return pattern_func(row['Guess'],row['Solution'])

pattern_func('dolls','hotel')

'02100'

# gsPairs['Pattern'] = gsPairs.apply(pattern, axis=1)
# gsPairs.to_csv(r"1. IO files/gsPairs.csv")

## To read:

gsPairs = pd.read_csv(r"1. IO files/gsPairs.csv",dtype=str)
gsPairs = gsPairs[['Guess','Solution','Pattern']]

## Version with narrow solution set (which excludes 'laser') - same as just inner joining gsPairs with solution_list

# gsPairsSolutions = pd.read_csv(r"1. IO files\gsPairsSolutions.csv",dtype=str)

import math
print(math.sqrt(len(gsPairs)))
# gsPairs.loc[(gsPairs['Solution']=='FALSE') | (gsPairs['Guess']=='FALSE'),:].head()
gsPairs.head()

6591.0

def path_solver(person,inputdf,guesses_df,solutions_df,path_length):

        #get path of patterns for person
        pattern_path = inputdf.loc[inputdf['Person']==person,['Guess','Pattern']].reset_index()
    
        #get the guess number of first correct guess (last guess)
        last_guess = pattern_path.Guess.iloc[pattern_path.Pattern.eq('22222').idxmax()]
        print('Last guess = '+str(last_guess))

        guesses_df = guesses_df.merge(pattern_path[['Pattern']].drop_duplicates(),how='inner',on='Pattern')
        solutions_df = solutions_df.merge(pattern_path[['Pattern']].drop_duplicates(),how='inner',on='Pattern')

        if last_guess<=1:
            return solutions_df
            
        #Start with last imperfect guess and work backward
        guess_num = last_guess-1
        pattern = pattern_path.loc[pattern_path['Guess']==guess_num,'Pattern'].item()
        print(pattern)
        # .merge(solution_set,how='inner',on=['Solution'])
        paths = solutions_df.loc[solutions_df['Pattern']==pattern,:].rename(columns={'Guess': 'Guess_'+str(guess_num),'Pattern': 'Pattern_'+str(guess_num),'Solution': 'Guess_'+str(guess_num+1)})
        
        # paths = paths.rename(columns={'Guess_'+str(last_guess):'Solution'})
        
        if path_length == 1:
            paths = paths.rename(columns={'Guess_'+str(last_guess):'Solution'})
            return paths
            # quit if you just want most recent guess
        
        #otherwise keep going back to earlier guesses
        else:
            for i in range(1,min(path_length,last_guess-1)):
                guess_num = last_guess-1-i #3 in my example
                pattern = pattern_path.loc[pattern_path['Guess']==guess_num,'Pattern'].item()
                print(pattern)
                paths = paths.merge(
                    guesses_df.loc[guesses_df['Pattern']==pattern,:]
                    ,how='inner',left_on=['Guess_'+str(guess_num+1)]
                    ,right_on=['Solution']
                    ,suffixes=('','_'+str(guess_num))
                    )
                paths = paths.drop('Solution',axis=1).rename(columns={'Guess':'Guess_'+str(guess_num),'Pattern':'Pattern_'+str(guess_num)})
                #check if the pattern is compatible with future guesses as well
                if len(paths)>0:
                    for j in range(0,i+1): #last guess - j > last guess - i = guess_num, means j<=i-1
                        paths = paths.merge(guesses_df.loc[guesses_df['Pattern']==pattern,:]
                                            ,how='inner'
                                            ,left_on=['Guess_'+str(guess_num),'Guess_'+str(last_guess-j)]
                                            ,right_on=['Guess','Solution']
                                           ).drop(['Guess','Solution'],axis=1).rename(columns={'Pattern':'Pattern_'+str(guess_num)+str(last_guess-j)})                                            
                        j+=1
                i+=1
            paths = paths.rename(columns={'Guess_'+str(last_guess):'Solution'})
            return paths

## inputs excel stores all players and their patterns by date; we extract today's

inputs = pd.read_excel(r"1. IO files\Inputs.xlsx",sheet_name='Inputs',dtype={'Person': str, 'Guess': int, 'Pattern': str, 'Date':str})
inputs = inputs.loc[inputs['Date']==str(pd.to_datetime('today').normalize())].reset_index().drop('index',axis=1)

## we initiate the path_solver on gsPairs,gsPairs, but then we narrow solutions down and use the dataframe nextStart which has the same guess set as gsPairs but a smaller solution column

test_paths = path_solver('Alex',inputs,gsPairs,nextStart,path_length=2)
# test_paths = path_solver('Serena',inputs,gsPairs,gsPairs,path_length=1)

Last guess = 4
01011
01110

# Check how many paths there are and how many valid solutions compared with starting solution set
print('Number of paths: '+f"{len(test_paths):,}")
print('Number of solutions: '+f"{len(test_paths['Solution'].unique()):,}")
print('Starting number of solutions: '+f"{len(gsPairs['Solution'].unique()):,}")

Number of paths: 18,944
Number of solutions: 210
Starting number of solutions: 6,591

## Get the updated short list of solutions and create the starting point for next iteration, 'nextStart'

# solution_short = test_paths[['Solution']].drop_duplicates()
# solution_short = nextStart.loc[(nextStart['Guess']=='enact')&(nextStart['Pattern']=='21000')]['Solution']
# nextStart = gsPairs.merge(solution_short,how='inner',on='Solution')

## in case we need to filter on the assumed wordle solution list (which from our 'laser' debacle we know is too narrow)

# nextStart = nextStart.merge(solution_list,how='inner',on='Solution')

## check the lengths

print(f"{len(nextStart['Solution'].unique()):,}")
print(f"{len(solution_short):,}")
# solution_short

65
238

# writer = pd.ExcelWriter(r"1. IO files\Serena_paths.xlsx",engine='xlsxwriter')
# solution_short.to_excel(writer, sheet_name='Solutions', index=False)
# test_paths.to_excel(writer, sheet_name='Paths', index=False)
# writer.close()

solution_short = pd.read_excel(r"1. IO files\Outputs.xlsx",sheet_name='Solutions',dtype={'Solution': str})

# solution_short = nextStart.merge(solution_list.rename(columns={'Solution':'Guess'}),how='inner',on='Guess')['Solution'].drop_duplicates()

# solution_short = test_paths.loc[(test_paths['Guess_2'].isin(solution_list['Solution']))&(test_paths['Guess_3'].isin(solution_list['Solution'])) & (test_paths['Guess_4'].isin(solution_list['Solution']))][['Solution']].drop_duplicates().reset_index().drop('index',axis=1)
# solution_short = test_paths.loc[test_paths['Guess_2'].isin(solution_list['Solution'])][['Solution']].drop_duplicates().reset_index().drop('index',axis=1)

# nextStart = nextStart.merge(solution_short,how='inner',on='Solution')
# len(solution_short)

19

## General solution optimization with gsPairs

# nextStart = gsPairs.merge(solution_list,how='inner',on='Solution')

guessEvaluator = nextStart.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))
guessEvaluator = guessEvaluator.fillna(0)
guessEvaluator = pd.DataFrame(guessEvaluator.to_records())

# guessEvaluator.head()

## Get the guesses by expected group size which is the expected size of group if solution chosen at random from solution list

guessEvaluator['Square group'] = guessEvaluator['Solution']**2/len(solution_list)
# for i in range(2,11):
#     guessEvaluator['Group < '+str(i)] = 100*guessEvaluator['Solution'].where(guessEvaluator['Solution'] < i, other=0) / len(solution_list)

# del guess_by_EGS

Guesses = guessEvaluator.groupby('Guess')['Square group'].sum()
Guesses = Guesses.reset_index().rename(columns={'Guess':'Guess','Square group':'Expected group'})
Guesses = Guesses.sort_values(by=['Expected group'],ascending=[True]).reset_index().drop('index',axis=1)
# Guesses = Guesses.merge(guessEvaluator.groupby('Guess')['Group < 10'].sum(),how='inner',on='Guess')
# Guesses = Guesses.sort_values(by=['Group < 10','Group < 3','Group < 2','Expected group'],ascending=[False,False,False,True]).reset_index().drop('index',axis=1)

guess = 'raise'
print(Guesses.loc[Guesses['Guess']==guess])
# guessEvaluator.loc[guessEvaluator['Guess']==guess][['Guess','Pattern','Solution']]

   Guess  Expected group  Group < 2  Group < 3  Group < 10
0  raise       61.000864   1.209503    2.24622   11.360691

def good_guess(gsp_df):
    if len(gsp_df['Solution'].unique())<3:
        return gsp_df['Solution'].iloc[0]
    else:
        gE = gsp_df.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))

        gE['Expected group'] = gE['Solution']**2/len(gsp_df['Solution'].unique())
        gE = gE.groupby('Guess')['Expected group'].sum().reset_index()
        gg = gE['Guess'].iloc[gE['Expected group'].idxmin()]

        return gg

# good_guess(nextStart)

guess = 'raise'
gsp = nextStart.loc[nextStart['Solution'].isin(nextStart.loc[(nextStart['Guess']==guess)&(nextStart['Pattern']=='11000')]['Solution'])]
good_guess(gsp)

Guess                crowd
Expected group    5.205128
Name: 1346, dtype: object

## attempt to apply the good_guess within the dataframe without iterating over a list

# mask = (nextStart['Guess']==guess)
# good_paths = nextStart.loc[mask]
# pattern_path = good_paths[['Pattern']].drop_duplicates()
# pattern_path['Guess_2'] = pattern_path.apply(lambda x: good_guess(nextStart.loc[nextStart['Solution'].isin(good_paths.loc[good_paths['Pattern']==x['Pattern']]['Solution'].unique())]),axis=1)
# pattern_path['Guess_2'] = pattern_path.apply(lambda x: len(good_paths.loc[good_paths['Pattern']==x['Pattern']]['Pattern']))
# pattern_path.head()

## 2m 5s - much faster than the 11m 6s I got with for loops. Still slow though.

# pattern_path.head()
# nextStart.head()
# good_paths = good_paths.merge(pattern_path).merge(nextStart.rename(columns={'Pattern':'Pattern_2','Guess':'Guess_2'}))
# pattern_path['Guess_3'] = pattern_path.apply(lambda x: good_guess(nextStart.loc[nextStart['Solution'].isin(good_paths.loc[good_paths['Path_2']==x['Path_2']]['Solution'].unique())]),axis=1)

# good_paths['Path_2'] = good_paths.apply(lambda x: x['Pattern'] + '|' + x['Guess_2'] + '|' + x['Pattern_2'],axis=1)
# pattern_path = good_paths[['Path_2']].drop_duplicates()

## guess 3 took 6m however, which seems on par or maybe longer than the list / for loop method.
## what is happening? Computation time per group getting smaller, but number of groups is growing. So must be groups driving increase.
## this makes some sense actually because the list of guesses we have to check against the group is so large.

# good_paths = good_paths.merge(pattern_path).merge(nextStart.rename(columns={'Pattern':'Pattern_3','Guess':'Guess_3'}))
# good_paths['Path_3'] = good_paths.apply(lambda x: x['Path_2'] + '|' + x['Guess_3'] + '|' + x['Pattern_3'] ,axis=1)
# good_paths.head()

# pattern_path = good_paths[['Path_3']].drop_duplicates()
# pattern_path['Guess_4'] = pattern_path.apply(lambda x: good_guess(nextStart.loc[nextStart['Solution'].isin(good_paths.loc[good_paths['Path_3']==x['Path_3']]['Solution'].unique())]),axis=1)

# good_paths = good_paths.merge(pattern_path).merge(nextStart.rename(columns={'Pattern':'Pattern_4','Guess':'Guess_4'}))
# good_paths.head()

## guess 4 clocks in even longer at 7m 35s. I believe this is longer than the list approach but would have to go back and check.
## guess 5 will really be the test because here a lot of the groups should be very fast because they're already discovered. The rest are small

# good_paths['Path_4'] = good_paths.apply(lambda x: x['Path_3'] + '|' + x['Guess_4'] + '|' + x['Pattern_4'] ,axis=1)
# pattern_path = good_paths[['Path_4']].drop_duplicates()
# pattern_path['Guess_5'] = pattern_path.apply(lambda x: good_guess(nextStart.loc[nextStart['Solution'].isin(good_paths.loc[good_paths['Path_4']==x['Path_4']]['Solution'].unique())]),axis=1)

## Guess 5 7m 38s making me think it is almost certainly just application of function to already solved paths that is costly and which can be easily fixed
## Ultimate test will be the next step

# good_paths = good_paths.merge(pattern_path).merge(nextStart.rename(columns={'Pattern':'Pattern_5','Guess':'Guess_5'}))
# good_paths['Path_5'] = good_paths.apply(lambda x: x['Path_4'] + '|' + x['Guess_5'] + '|' + x['Pattern_5'] ,axis=1)
# good_paths.head()

# pattern_path = good_paths[['Path_5']].drop_duplicates()
# pattern_path['Guess_6'] = pattern_path.apply(lambda x: good_guess(nextStart.loc[nextStart['Solution'].isin(good_paths.loc[good_paths['Path_5']==x['Path_5']]['Solution'].unique())]),axis=1)

## Guess 6 7m 26s so this is definitely an artificial issue. Let's finish the computation though. Simple fix is just filter patterns off solved or when we pull the patterns do the pivot.

# good_paths = good_paths.merge(pattern_path).merge(nextStart.rename(columns={'Pattern':'Pattern_6','Guess':'Guess_6'}))
# good_paths['Path_6'] = good_paths.apply(lambda x: x['Path_5'] + '|' + x['Guess_6'] + '|' + x['Pattern_6'] ,axis=1)
# good_paths.head()

# len(good_paths.loc[good_paths['Pattern_5']!='22222'])

## Different results from the previous computation. This time it seems we always get it within 5 guesses. Let's do EV

# good_paths = good_paths.rename(columns={'Pattern':'Pattern_1'})
solve = '22222'

ev = len(good_paths.loc[good_paths['Pattern_1']==solve])
for i in range(1,6):
    ev+= (len(good_paths.loc[good_paths['Pattern_'+str(i+1)]==solve]) - len(good_paths.loc[good_paths['Pattern_'+str(i)]==solve]))*(i+1)

print(ev)
print(len(good_paths))
print(ev/len(good_paths))

8249
2315
3.563282937365011

guess = 'raise'
list = []

for pattern in guessEvaluator.loc[guessEvaluator['Guess']==guess]['Pattern'].unique():
    # print(pattern)
    gsp = nextStart.loc[nextStart['Solution'].isin(nextStart.loc[(nextStart['Guess']==guess)&(nextStart['Pattern']==pattern)]['Solution'])]
    gg = good_guess(gsp)
    list.append((pattern,gg))
    print(pattern)
    print(gg)

00000
00000
Guess               could
Expected group    6.27381
Name: 1264, dtype: object
00001
00001
Guess                lento
Expected group    4.983471
Name: 3189, dtype: object
00002
00002
Guess                could
Expected group    2.803279
Name: 1264, dtype: object
00010
00010
Guess             plonk
Expected group    4.325
Name: 4299, dtype: object
00011
00011
Guess                spelt
Expected group    2.219512
Name: 5429, dtype: object
00012
00012
Guess                knots
Expected group    1.705882
Name: 3045, dtype: object
00020
00020
Guess                slobs
Expected group    1.352941
Name: 5270, dtype: object
00021
00021
Guess             clogs
Expected group      1.0
Name: 1126, dtype: object
00022
00022
Guess             cloth
Expected group      1.9
Name: 1130, dtype: object
00100
00100
Guess                ponty
Expected group    5.429907
Name: 4342, dtype: object
00101
00101
Guess                lined
Expected group    1.742857
Name: 3238, dtype: object
00102
00102
Guess             lingo
Expected group     2.12
Name: 3243, dtype: object
00110
00110
Guess                shout
Expected group    1.380952
Name: 5135, dtype: object
00111
00111
Guess             abets
Expected group      1.0
Name: 11, dtype: object
00112
00112
Guess             agent
Expected group      1.0
Name: 76, dtype: object
00120
00120
Guess             cysts
Expected group      1.0
Name: 1406, dtype: object
00200
00200
Guess                clint
Expected group    2.803922
Name: 1120, dtype: object
00201
00201
Guess             dench
Expected group      1.4
Name: 1498, dtype: object
00202
00202
Guess                cloth
Expected group    1.695652
Name: 1130, dtype: object
00210
00210
Guess                plant
Expected group    2.241379
Name: 4276, dtype: object
00211
00211
Guess             ached
Expected group      1.0
Name: 28, dtype: object
00212
00212
Guess             plant
Expected group      1.8
Name: 4276, dtype: object
00220
00220
Guess                forth
Expected group    1.222222
Name: 2163, dtype: object
00221
00221
exist
00222
00222
Guess             acing
Expected group      1.0
Name: 31, dtype: object
01000
01000
Guess                clout
Expected group    3.934783
Name: 1133, dtype: object
01001
01001
Guess                cleat
Expected group    3.347826
Name: 1106, dtype: object
01002
01002
Guess                black
Expected group    3.829268
Name: 558, dtype: object
01010
01010
Guess                chalk
Expected group    2.674419
Name: 987, dtype: object
01011
01011
Guess                knelt
Expected group    1.333333
Name: 3038, dtype: object
01012
01012
Guess             klutz
Expected group      2.9
Name: 3030, dtype: object
01020
01020
Guess                shalt
Expected group    2.272727
Name: 5074, dtype: object
01021
01021
Guess             flyte
Expected group      1.0
Name: 2120, dtype: object
01022
01022
Guess             butch
Expected group      1.0
Name: 839, dtype: object
01100
01100
Guess                until
Expected group    1.941176
Name: 6149, dtype: object
01101
01101
Guess             abaca
Expected group      1.0
Name: 2, dtype: object
01102
01102
image
01110
01110
Guess             anata
Expected group      1.0
Name: 184, dtype: object
01111
01111
sepia
01112
01112
aisle
01120
01120
quasi
01200
01200
Guess                agony
Expected group    1.166667
Name: 85, dtype: object
01201
01201
alien
01202
01202
Guess             alkyd
Expected group      1.0
Name: 127, dtype: object
01212
01212
aside
01220
01220
amiss
02000
02000
Guess                culty
Expected group    5.769231
Name: 1374, dtype: object
02001
02001
Guess             notch
Expected group      2.1
Name: 3905, dtype: object
02002
02002
Guess             gulch
Expected group      2.0
Name: 2518, dtype: object
02010
02010
Guess             tolan
Expected group      1.7
Name: 5933, dtype: object
02011
02011
easel
02012
02012
Guess                butch
Expected group    1.666667
Name: 839, dtype: object
02020
02020
Guess             lasts
Expected group      1.0
Name: 3127, dtype: object
02022
02022
Guess             ample
Expected group      1.0
Name: 179, dtype: object
02100
02100
Guess                clint
Expected group    1.571429
Name: 1120, dtype: object
02110
02110
Guess             blocs
Expected group      1.0
Name: 588, dtype: object
02200
02200
Guess                adapt
Expected group    1.333333
Name: 44, dtype: object
02202
02202
Guess             admin
Expected group      1.0
Name: 53, dtype: object
02210
02210
saint
02220
02220
daisy
10000
10000
Guess                count
Expected group    4.941748
Name: 1265, dtype: object
10001
10001
Guess                outed
Expected group    7.764706
Name: 4016, dtype: object
10002
10002
Guess             prong
Expected group     3.05
Name: 4425, dtype: object
10010
10010
Guess                count
Expected group    2.083333
Name: 1265, dtype: object
10011
10011
Guess                sheep
Expected group    1.888889
Name: 5096, dtype: object
10012
10012
Guess             perch
Expected group      1.6
Name: 4179, dtype: object
10020
10020
Guess                count
Expected group    1.153846
Name: 1265, dtype: object
10021
10021
Guess             chops
Expected group      1.0
Name: 1056, dtype: object
10022
10022
Guess             count
Expected group     1.25
Name: 1265, dtype: object
10100
10100
Guess                bunty
Expected group    1.608696
Name: 812, dtype: object
10101
10101
Guess                fined
Expected group    2.923077
Name: 2034, dtype: object
10102
10102
Guess             aargh
Expected group      1.0
Name: 0, dtype: object
10110
10110
Guess             about
Expected group      1.0
Name: 20, dtype: object
10111
10111
Guess             admin
Expected group      1.0
Name: 53, dtype: object
10120
10120
first
10200
10200
Guess                plunk
Expected group    3.142857
Name: 4309, dtype: object
10201
10201
Guess             decaf
Expected group      1.5
Name: 1464, dtype: object
10202
10202
Guess                bumps
Expected group    2.647059
Name: 797, dtype: object
10210
10210
Guess             altho
Expected group      1.0
Name: 150, dtype: object
10211
10211
skier
10212
10212
shire
10220
10220
Guess             amuck
Expected group      1.4
Name: 182, dtype: object
11000
11000
Guess                crowd
Expected group    5.205128
Name: 1346, dtype: object
11001
11001
Guess                bleat
Expected group    2.882353
Name: 571, dtype: object
11002
11002
Guess                track
Expected group    3.692308
Name: 5987, dtype: object
11010
11010
Guess                chapt
Expected group    2.047619
Name: 994, dtype: object
11011
11011
Guess             champ
Expected group      1.0
Name: 988, dtype: object
11012
11012
Guess             chant
Expected group      1.0
Name: 991, dtype: object
11020
11020
Guess             bahts
Expected group     1.25
Name: 360, dtype: object
11022
11022
erase
11100
11100
Guess                gland
Expected group    1.666667
Name: 2366, dtype: object
11101
11101
aider
11102
11102
irate
11110
11110
stair
11200
11200
Guess             abled
Expected group      1.0
Name: 15, dtype: object
11202
11202
afire
11222
11222
arise
12000
12000
Guess                chomp
Expected group    2.692308
Name: 1053, dtype: object
12001
12001
Guess                empty
Expected group    3.642857
Name: 1823, dtype: object
12002
12002
Guess             abaca
Expected group      1.0
Name: 2, dtype: object
12010
12010
savor
12011
12011
safer
12020
12020
marsh
12022
12022
parse
12100
12100
nadir
12200
12200
Guess             ached
Expected group      1.0
Name: 28, dtype: object
20000
20000
Guess             muted
Expected group      1.0
Name: 3770, dtype: object
20001
20001
Guess             could
Expected group      2.1
Name: 1264, dtype: object
20002
20002
Guess             about
Expected group      1.0
Name: 20, dtype: object
20010
20010
rusty
20011
20011
reset
20020
20020
roost
20022
20022
reuse
20100
20100
Guess             abbot
Expected group      1.0
Name: 9, dtype: object
20101
20101
Guess             apted
Expected group      1.5
Name: 233, dtype: object
20102
20102
ridge
20110
20110
risky
20111
20111
Guess             acker
Expected group      1.0
Name: 32, dtype: object
20122
20122
rinse
20200
20200
rhino
20201
20201
reign
21000
21000
Guess             abaya
Expected group      1.0
Name: 6, dtype: object
21001
21001
Guess                mylar
Expected group    1.461538
Name: 3776, dtype: object
21020
21020
roast
21100
21100
rival
22000
22000
Guess             lohan
Expected group      1.0
Name: 3297, dtype: object
22001
22001
Guess             carom
Expected group      1.0
Name: 928, dtype: object
22002
22002
range
22010
22010
raspy
22100
22100
Guess             abord
Expected group      1.0
Name: 18, dtype: object
22200
22200
rainy
22222
22222
raise

# list[80][1]
# new_list = []

# for i in range(len(list)):
#     if isinstance(list[i][1], str):
#         new_list.append([list[i][0],list[i][1]])
#     else:
#         new_list.append([list[i][0],list[i][1][0]])

# gg_df = pd.DataFrame(new_list,columns=['Pattern','Guess_2'])
# good_paths = nextStart.loc[nextStart['Guess']==guess].merge(gg_df,how='inner',on='Pattern')

# good_paths = good_paths.merge(gsPairs.rename(columns={'Guess':'Guess_2','Pattern':'Pattern_2'}),how='inner',on=['Guess_2','Solution'])

# good_paths.head()

# good_paths.loc[good_paths['Pattern_2']=='22222']

# new_list

[['00000', 'could'],
 ['00001', 'lento'],
 ['00002', 'could'],
 ['00010', 'plonk'],
 ['00011', 'spelt'],
 ['00012', 'knots'],
 ['00020', 'slobs'],
 ['00021', 'clogs'],
 ['00022', 'cloth'],
 ['00100', 'ponty'],
 ['00101', 'lined'],
 ['00102', 'lingo'],
 ['00110', 'shout'],
 ['00111', 'abets'],
 ['00112', 'agent'],
 ['00120', 'cysts'],
 ['00200', 'clint'],
 ['00201', 'dench'],
 ['00202', 'cloth'],
 ['00210', 'plant'],
 ['00211', 'ached'],
 ['00212', 'plant'],
 ['00220', 'forth'],
 ['00221', 'exist'],
 ['00222', 'acing'],
 ['01000', 'clout'],
 ['01001', 'cleat'],
 ['01002', 'black'],
 ['01010', 'chalk'],
 ['01011', 'knelt'],
 ['01012', 'klutz'],
 ['01020', 'shalt'],
 ['01021', 'flyte'],
 ['01022', 'butch'],
 ['01100', 'until'],
 ['01101', 'abaca'],
 ['01102', 'image'],
 ['01110', 'anata'],
 ['01111', 'sepia'],
 ['01112', 'aisle'],
 ['01120', 'quasi'],
 ['01200', 'agony'],
 ['01201', 'alien'],
 ['01202', 'alkyd'],
 ['01212', 'aside'],
 ['01220', 'amiss'],
 ['02000', 'culty'],
 ['02001', 'notch'],
 ['02002', 'gulch'],
 ['02010', 'tolan'],
 ['02011', 'easel'],
 ['02012', 'butch'],
 ['02020', 'lasts'],
 ['02022', 'ample'],
 ['02100', 'clint'],
 ['02110', 'blocs'],
 ['02200', 'adapt'],
 ['02202', 'admin'],
 ['02210', 'saint'],
 ['02220', 'daisy'],
 ['10000', 'count'],
 ['10001', 'outed'],
 ['10002', 'prong'],
 ['10010', 'count'],
 ['10011', 'sheep'],
 ['10012', 'perch'],
 ['10020', 'count'],
 ['10021', 'chops'],
 ['10022', 'count'],
 ['10100', 'bunty'],
 ['10101', 'fined'],
 ['10102', 'aargh'],
 ['10110', 'about'],
 ['10111', 'admin'],
 ['10120', 'first'],
 ['10200', 'plunk'],
 ['10201', 'decaf'],
 ['10202', 'bumps'],
 ['10210', 'altho'],
 ['10211', 'skier'],
 ['10212', 'shire'],
 ['10220', 'amuck'],
 ['11000', 'crowd'],
 ['11001', 'bleat'],
 ['11002', 'track'],
 ['11010', 'chapt'],
 ['11011', 'champ'],
 ['11012', 'chant'],
 ['11020', 'bahts'],
 ['11022', 'erase'],
 ['11100', 'gland'],
 ['11101', 'aider'],
 ['11102', 'irate'],
 ['11110', 'stair'],
 ['11200', 'abled'],
 ['11202', 'afire'],
 ['11222', 'arise'],
 ['12000', 'chomp'],
 ['12001', 'empty'],
 ['12002', 'abaca'],
 ['12010', 'savor'],
 ['12011', 'safer'],
 ['12020', 'marsh'],
 ['12022', 'parse'],
 ['12100', 'nadir'],
 ['12200', 'ached'],
 ['20000', 'muted'],
 ['20001', 'could'],
 ['20002', 'about'],
 ['20010', 'rusty'],
 ['20011', 'reset'],
 ['20020', 'roost'],
 ['20022', 'reuse'],
 ['20100', 'abbot'],
 ['20101', 'apted'],
 ['20102', 'ridge'],
 ['20110', 'risky'],
 ['20111', 'acker'],
 ['20122', 'rinse'],
 ['20200', 'rhino'],
 ['20201', 'reign'],
 ['21000', 'abaya'],
 ['21001', 'mylar'],
 ['21020', 'roast'],
 ['21100', 'rival'],
 ['22000', 'lohan'],
 ['22001', 'carom'],
 ['22002', 'range'],
 ['22010', 'raspy'],
 ['22100', 'abord'],
 ['22200', 'rainy'],
 ['22222', 'raise']]

# new_list_two = []

# for guesspattern in new_list:
#     # print(guesspattern)
#     mask = (good_paths['Pattern']==guesspattern[0])&(good_paths['Guess_2']==guesspattern[1])
#     for pattern in good_paths.loc[mask]['Pattern_2'].unique():
#         mask_1 = (good_paths['Pattern']==guesspattern[0])&(good_paths['Guess_2']==guesspattern[1])&(good_paths['Pattern_2']==pattern)
#         mask_2 = nextStart['Solution'].isin(good_paths.loc[mask_1]['Solution'].unique())
#         gsp = nextStart.loc[mask_2]
#         # print(pattern + ' ' + good_guess(gsp))
#         item = guesspattern + [pattern,good_guess(gsp)]
#         new_list_two.append(item)

new_list_two

# gg_df = pd.DataFrame(new_list_two,columns=['Pattern','Guess_2','Pattern_2','Guess_3'])
# good_paths = good_paths.merge(gg_df,how='inner',on=['Pattern','Guess_2','Pattern_2'])
# good_paths = good_paths.merge(gsPairs.rename(columns={'Guess':'Guess_3','Pattern':'Pattern_3'}))
# good_paths.head(50)
# len(good_paths.loc[good_paths['Pattern_3']=='22222'])

[['00000', 'could', '02222', 'would'],
 ['00000', 'could', '22222', 'could'],
 ['00000', 'could', '02202', 'blimp'],
 ['00000', 'could', '01000', 'actin'],
 ['00000', 'could', '02200', 'amity'],
 ['00000', 'could', '02000', 'boozy'],
 ['00000', 'could', '01012', 'abbot'],
 ['00000', 'could', '11010', 'block'],
 ['00000', 'could', '00120', 'badge'],
 ['00000', 'could', '22200', 'aargh'],
 ['00000', 'could', '12200', 'adapt'],
 ['00000', 'could', '00100', 'nymph'],
 ['00000', 'could', '21010', 'actin'],
 ['00000', 'could', '00101', 'blimp'],
 ['00000', 'could', '10110', 'agony'],
 ['00000', 'could', '02201', 'doubt'],
 ['00000', 'could', '10101', 'dutch'],
 ['00000', 'could', '21112', 'cloud'],
 ['00000', 'could', '01010', 'album'],
 ['00000', 'could', '10100', 'habit'],
 ['00000', 'could', '20200', 'aback'],
 ['00000', 'could', '01100', 'amigo'],
 ['00000', 'could', '00200', 'thumb'],
 ['00000', 'could', '02010', 'abort'],
 ['00000', 'could', '02020', 'whang'],
 ['00000', 'could', '22001', 'condo'],
 ['00000', 'could', '22010', 'colon'],
 ['00000', 'could', '11000', 'knock'],
 ['00000', 'could', '10010', 'lynch'],
 ['00000', 'could', '02001', 'dowdy'],
 ['00000', 'could', '00210', 'flank'],
 ['00000', 'could', '12000', 'abate'],
 ['00000', 'could', '02021', 'dolly'],
 ['00000', 'could', '00010', 'lymph'],
 ['00000', 'could', '01021', 'oddly'],
 ['00000', 'could', '22000', 'comfy'],
 ['00000', 'could', '00110', 'abled'],
 ['00000', 'could', '01020', 'knoll'],
 ['00000', 'could', '10210', 'pluck'],
 ['00000', 'could', '02101', 'donut'],
 ['00000', 'could', '02110', 'mogul'],
 ['00000', 'could', '00000', 'nymph'],
 ['00000', 'could', '21110', 'clout'],
 ['00000', 'could', '21000', 'chock'],
 ['00000', 'could', '20210', 'admin'],
 ['00000', 'could', '01110', 'ghoul'],
 ['00000', 'could', '02011', 'moldy'],
 ['00000', 'could', '01101', 'outdo'],
 ['00000', 'could', '02220', 'moult'],
 ['00000', 'could', '00121', 'dully'],
 ['00000', 'could', '22020', 'coyly'],
 ['00001', 'lento', '11011', 'botch'],
 ['00001', 'lento', '01000', 'abaca'],
 ['00001', 'lento', '01101', 'caved'],
 ['00001', 'lento', '22000', 'aargh'],
 ['00001', 'lento', '01201', 'abhor'],
 ['00001', 'lento', '11001', 'devel'],
 ['00001', 'lento', '12001', 'below'],
 ['00001', 'lento', '01110', 'event'],
 ['00001', 'lento', '01111', 'often'],
 ['00001', 'lento', '12002', 'hello'],
 ['00001', 'lento', '01100', 'buffy'],
 ['00001', 'lento', '02020', 'aleph'],
 ['00001', 'lento', '01020', 'empty'],
 ['00001', 'lento', '11000', 'caped'],
 ['00001', 'lento', '11101', 'novel'],
 ['00001', 'lento', '12100', 'newly'],
 ['00001', 'lento', '01001', 'atopy'],
 ['00001', 'lento', '01010', 'ached'],
 ['00001', 'lento', '11010', 'aleck'],
 ['00001', 'lento', '02011', 'depot'],
 ['00001', 'lento', '11100', 'blend'],
 ['00001', 'lento', '02000', 'alkyd'],
 ['00001', 'lento', '22101', 'lemon'],
 ['00001', 'lento', '02200', 'abaca'],
 ['00001', 'lento', '02100', 'begun'],
 ['00001', 'lento', '12000', 'abbey'],
 ['00001', 'lento', '02010', 'abbey'],
 ['00001', 'lento', '02101', 'demon'],
 ['00001', 'lento', '02012', 'tempo'],
 ['00001', 'lento', '02220', 'tenth'],
 ['00001', 'lento', '01011', 'bombe'],
 ['00001', 'lento', '02201', 'venom'],
 ['00001', 'lento', '12101', 'melon'],
 ['00001', 'lento', '02002', 'gecko'],
 ['00001', 'lento', '22020', 'lefty'],
 ['00001', 'lento', '02210', 'tenet'],
 ['00001', 'lento', '21100', 'lumen'],
 ['00001', 'lento', '02001', 'decoy'],
 ['00001', 'lento', '11110', 'knelt'],
 ['00001', 'lento', '12010', 'betel'],
 ['00002', 'could', '01000', 'ancho'],
 ['00002', 'could', '01100', 'quote'],
 ['00002', 'could', '01020', 'whole'],
 ['00002', 'could', '00000', 'adept'],
 ['00002', 'could', '00101', 'bajan'],
 ['00002', 'could', '20020', 'cycle'],
 ['00002', 'could', '02011', 'lodge'],
 ['00002', 'could', '10000', 'champ'],
 ['00002', 'could', '02001', 'dodge'],
 ['00002', 'could', '01010', 'badge'],
 ['00002', 'could', '00100', 'abele'],
 ['00002', 'could', '02020', 'noble'],
 ['00002', 'could', '21010', 'clone'],
 ['00002', 'could', '10120', 'uncle'],
 ['00002', 'could', '00020', 'belle'],
 ['00002', 'could', '22200', 'coupe'],
 ['00002', 'could', '00210', 'empty'],
 ['00002', 'could', '00001', 'hedge'],
 ['00002', 'could', '11100', 'ounce'],
 ['00002', 'could', '02100', 'vogue'],
 ['00002', 'could', '21000', 'choke'],
 ['00002', 'could', '02000', 'booze'],
 ['00002', 'could', '00011', 'ledge'],
 ['00002', 'could', '00010', 'melee'],
 ['00002', 'could', '10201', 'deuce'],
 ['00002', 'could', '00110', 'bulge'],
 ['00002', 'could', '20200', 'chute'],
 ['00002', 'could', '00120', 'bugle'],
 ['00002', 'could', '00201', 'etude'],
 ['00002', 'could', '02200', 'gouge'],
 ['00002', 'could', '00211', 'elude'],
 ['00002', 'could', '02220', 'boule'],
 ['00002', 'could', '10101', 'dunce'],
 ['00010', 'plonk', '00100', 'abuts'],
 ['00010', 'plonk', '00202', 'atmos'],
 ['00010', 'plonk', '00000', 'humid'],
 ['00010', 'plonk', '00120', 'sound'],
 ['00010', 'plonk', '00210', 'acton'],
 ['00010', 'plonk', '00110', 'bonus'],
 ['00010', 'plonk', '00200', 'chute'],
 ['00010', 'plonk', '00020', 'aight'],
 ['00010', 'plonk', '00002', 'stuck'],
 ['00010', 'plonk', '10200', 'afoot'],
 ['00010', 'plonk', '01001', 'skull'],
 ['00010', 'plonk', '10210', 'spoon'],
 ['00010', 'plonk', '01200', 'ached'],
 ['00010', 'plonk', '01100', 'locus'],
 ['00010', 'plonk', '00220', 'stony'],
 ['00010', 'plonk', '00201', 'smoky'],
 ['00010', 'plonk', '00001', 'ached'],
 ['00010', 'plonk', '11000', 'lupus'],
 ['00010', 'plonk', '11200', 'spool'],
 ['00010', 'plonk', '10000', 'stump'],
 ['00010', 'plonk', '00022', 'skunk'],
 ['00010', 'plonk', '12000', 'slump'],
 ['00010', 'plonk', '00010', 'snuff'],
 ['00010', 'plonk', '01000', 'about'],
 ['00010', 'plonk', '02200', 'sloth'],
 ['00010', 'plonk', '10022', 'spunk'],
 ['00010', 'plonk', '10202', 'spook'],
 ['00010', 'plonk', '12200', 'sloop'],
 ['00010', 'plonk', '00012', 'snuck'],
 ['00010', 'plonk', '20000', 'pushy'],
 ['00010', 'plonk', '02020', 'slung'],
 ['00010', 'plonk', '02000', 'slyly'],
 ['00010', 'plonk', '02022', 'slunk'],
 ['00010', 'plonk', '01002', 'skulk'],
 ['00011', 'spelt', '22200', 'abled'],
 ['00011', 'spelt', '20211', 'steel'],
 ['00011', 'spelt', '20202', 'ached'],
 ['00011', 'spelt', '20100', 'seven'],
 ['00011', 'spelt', '22202', 'spent'],
 ['00011', 'spelt', '21210', 'sleep'],
 ['00011', 'spelt', '21101', 'setup'],
 ['00011', 'spelt', '20220', 'shawl'],
 ['00011', 'spelt', '21200', 'sheep'],
 ['00011', 'spelt', '22220', 'spell'],
 ['00011', 'spelt', '12102', 'upset'],
 ['00011', 'spelt', '10102', 'abbot'],
 ['00011', 'spelt', '21201', 'steep'],
 ['00011', 'spelt', '20210', 'sleek'],
 ['00011', 'spelt', '21202', 'swept'],
 ['00011', 'spelt', '21212', 'slept'],
 ['00011', 'spelt', '10101', 'attap'],
 ['00011', 'spelt', '20200', 'sheen'],
 ['00011', 'spelt', '20201', 'steed'],
 ['00011', 'spelt', '11100', 'pesky'],
 ['00011', 'spelt', '11101', 'pesto'],
 ['00011', 'spelt', '22222', 'spelt'],
 ['00011', 'spelt', '20212', 'sleet'],
 ['00011', 'spelt', '20222', 'smelt'],
 ['00011', 'spelt', '10100', 'nosey'],
 ['00012', 'knots', '00011', 'style'],
 ['00012', 'knots', '01211', 'stone'],
 ['00012', 'knots', '01001', 'scene'],
 ['00012', 'knots', '00201', 'ached'],
 ['00012', 'knots', '10201', 'smoke'],
 ['00012', 'knots', '00101', 'solve'],
 ['00012', 'knots', '00211', 'stove'],
 ['00012', 'knots', '10211', 'stoke'],
 ['00012', 'knots', '01201', 'shone'],
 ['00012', 'knots', '02001', 'ensue'],
 ['00012', 'knots', '00221', 'smote'],
 ['00012', 'knots', '00001', 'segue'],
 ['00020', 'slobs', '10200', 'ghost'],
 ['00020', 'slobs', '10210', 'boost'],
 ['00020', 'slobs', '12000', 'flush'],
 ['00020', 'slobs', '12202', 'gloss'],
 ['00020', 'slobs', '10000', 'gypsy'],
 ['00020', 'slobs', '12010', 'blush'],
 ['00020', 'slobs', '11100', 'lousy'],
 ['00020', 'slobs', '10101', 'mossy'],
 ['00020', 'slobs', '10001', 'fussy'],
 ['00020', 'slobs', '22000', 'slush'],
 ['00020', 'slobs', '10111', 'bossy'],
 ['00020', 'slobs', '10100', 'joust'],
 ['00020', 'slobs', '20000', 'shush'],
 ['00020', 'slobs', '22200', 'slosh'],
 ['00021', 'clogs', '00011', 'guest'],
 ['00021', 'clogs', '00012', 'guess'],
 ['00021', 'clogs', '00001', 'quest'],
 ['00021', 'clogs', '20001', 'chest'],
 ['00021', 'clogs', '20002', 'chess'],
 ['00021', 'clogs', '01001', 'welsh'],
 ['00021', 'clogs', '02001', 'flesh'],
 ['00021', 'clogs', '02002', 'bless'],
 ['00021', 'clogs', '00101', 'poesy'],
 ['00022', 'cloth', '00011', 'these'],
 ['00022', 'cloth', '00211', 'those'],
 ['00022', 'cloth', '00101', 'house'],
 ['00022', 'cloth', '22200', 'close'],
 ['00022', 'cloth', '00000', 'adage'],
 ['00022', 'cloth', '00201', 'whose'],
 ['00022', 'cloth', '00100', 'about'],
 ['00022', 'cloth', '01000', 'pulse'],
 ['00022', 'cloth', '01200', 'loose'],
 ['00022', 'cloth', '20201', 'chose'],
 ['00022', 'cloth', '00200', 'acing'],
 ['00022', 'cloth', '00010', 'tense'],
 ['00022', 'cloth', '01100', 'louse'],
 ['00022', 'cloth', '20100', 'copse'],
 ['00100', 'ponty', '12010', 'topic'],
 ['00100', 'ponty', '02100', 'login'],
 ['00100', 'ponty', '00110', 'adult'],
 ['00100', 'ponty', '00010', 'dwelt'],
 ['00100', 'ponty', '10110', 'input'],
 ['00100', 'ponty', '00000', 'cavil'],
 ['00100', 'ponty', '00020', 'abled'],
 ['00100', 'ponty', '02000', 'abled'],
 ['00100', 'ponty', '21010', 'pilot'],
 ['00100', 'ponty', '00201', 'vinyl'],
 ['00100', 'ponty', '00002', 'badly'],
 ['00100', 'ponty', '20010', 'pitch'],
 ['00100', 'ponty', '01200', 'abele'],
 ['00100', 'ponty', '02010', 'acted'],
 ['00100', 'ponty', '00022', 'batik'],
 ['00100', 'ponty', '01100', 'inbox'],
 ['00100', 'ponty', '11010', 'optic'],
 ['00100', 'ponty', '10002', 'admin'],
 ['00100', 'ponty', '00220', 'ninth'],
 ['00100', 'ponty', '20000', 'pupil'],
 ['00100', 'ponty', '01000', 'abled'],
 ['00100', 'ponty', '00202', 'aking'],
 ['00100', 'ponty', '10000', 'lipid'],
 ['00100', 'ponty', '10010', 'tulip'],
 ['00100', 'ponty', '20200', 'pinch'],
 ['00100', 'ponty', '00200', 'cafes'],
 ['00100', 'ponty', '02200', 'ionic'],
 ['00100', 'ponty', '02110', 'toxin'],
 ['00100', 'ponty', '02210', 'tonic'],
 ['00100', 'ponty', '01020', 'ditto'],
 ['00100', 'ponty', '00012', 'itchy'],
 ['00100', 'ponty', '11000', 'hippo'],
 ['00100', 'ponty', '21220', 'pinto'],
 ['00100', 'ponty', '20002', 'piggy'],
 ['00100', 'ponty', '10100', 'unzip'],
 ['00100', 'ponty', '00210', 'tunic'],
 ['00100', 'ponty', '20202', 'pinky'],
 ['00100', 'ponty', '00100', 'cumin'],
 ['00100', 'ponty', '01010', 'bigot'],
 ['00100', 'ponty', '01110', 'ingot'],
 ['00100', 'ponty', '00222', 'minty'],
 ['00100', 'ponty', '20012', 'pithy'],
 ['00100', 'ponty', '00001', 'idyll'],
 ['00101', 'lined', '02021', 'video'],
 ['00101', 'lined', '01121', 'index'],
 ['00101', 'lined', '12012', 'alway'],
 ['00101', 'lined', '02120', 'given'],
 ['00101', 'lined', '01110', 'begin'],
 ['00101', 'lined', '02010', 'eight'],
 ['00101', 'lined', '11011', 'devil'],
 ['00101', 'lined', '12020', 'pixel'],
 ['00101', 'lined', '22220', 'linen'],
 ['00101', 'lined', '01211', 'denim'],
 ['00101', 'lined', '01011', 'debit'],
 ['00101', 'lined', '01010', 'abaca'],
 ['00101', 'lined', '11120', 'inlet'],
 ['00101', 'lined', '11010', 'helix'],
 ['00101', 'lined', '02121', 'widen'],
 ['00101', 'lined', '22020', 'libel'],
 ['00101', 'lined', '02112', 'fiend'],
 ['00101', 'lined', '02220', 'piney'],
 ['00101', 'lined', '01012', 'tepid'],
 ['00101', 'lined', '11110', 'elfin'],
 ['00101', 'lined', '02020', 'bicep'],
 ['00101', 'lined', '01210', 'ennui'],
 ['00101', 'lined', '22120', 'liken'],
 ['00101', 'lined', '11020', 'impel'],
 ['00102', 'lingo', '12000', 'abele'],
 ['00102', 'lingo', '01001', 'movie'],
 ['00102', 'lingo', '02000', 'actin'],
 ['00102', 'lingo', '02100', 'niche'],
 ['00102', 'lingo', '01000', 'cutie'],
 ['00102', 'lingo', '02001', 'diode'],
 ['00102', 'lingo', '02220', 'hinge'],
 ['00102', 'lingo', '01210', 'genie'],
 ['00102', 'lingo', '02200', 'wince'],
 ['00102', 'lingo', '22020', 'liege'],
 ['00102', 'lingo', '12020', 'bilge'],
 ['00102', 'lingo', '02020', 'midge'],
 ['00102', 'lingo', '01100', 'untie'],
 ['00102', 'lingo', '22000', 'lithe'],
 ['00102', 'lingo', '11000', 'belie'],
 ['00110', 'shout', '10010', 'music'],
 ['00110', 'shout', '10002', 'visit'],
 ['00110', 'shout', '20100', 'solid'],
 ['00110', 'shout', '20002', 'split'],
 ['00110', 'shout', '21002', 'sight'],
 ['00110', 'shout', '21001', 'sixth'],
 ['00110', 'shout', '10100', 'disco'],
 ['00110', 'shout', '10020', 'minus'],
 ['00110', 'shout', '20000', 'silly'],
 ['00110', 'shout', '20001', 'sixty'],
 ['00110', 'shout', '21010', 'sushi'],
 ['00110', 'shout', '20200', 'spoil'],
 ['00110', 'shout', '11000', 'fishy'],
 ['00110', 'shout', '10102', 'posit'],
 ['00110', 'shout', '20201', 'stoic'],
 ['00110', 'shout', '10000', 'wispy'],
 ['00110', 'shout', '20010', 'squib'],
 ['00111', 'abets', '00211', 'stein'],
 ['00111', 'abets', '00201', 'sheik'],
 ['00111', 'abets', '00111', 'islet'],
 ['00111', 'abets', '00101', 'sinew'],
 ['00112', 'agent', '00110', 'since'],
 ['00112', 'agent', '00100', 'issue'],
 ['00112', 'agent', '01200', 'siege'],
 ['00112', 'agent', '00200', 'sieve'],
 ['00112', 'agent', '01110', 'singe'],
 ['00120', 'cysts', '00110', 'midst'],
 ['00120', 'cysts', '01200', 'missy'],
 ['00120', 'cysts', '00100', 'kiosk'],
 ['00120', 'cysts', '01201', 'sissy'],
 ['00120', 'cysts', '01100', 'gipsy'],
 ['00120', 'cysts', '01110', 'tipsy'],
 ['00200', 'clint', '10200', 'alkyd'],
 ['00200', 'clint', '22200', 'abaca'],
 ['00200', 'clint', '00221', 'aargh'],
 ['00200', 'clint', '00222', 'point'],
 ['00200', 'clint', '00220', 'bawdy'],
 ['00200', 'clint', '21200', 'cardi'],
 ['00200', 'clint', '01200', 'abide'],
 ['00200', 'clint', '00210', 'about'],
 ['00200', 'clint', '01202', 'badge'],
 ['00200', 'clint', '02220', 'aback'],
 ['00200', 'clint', '10201', 'thick'],
 ['00200', 'clint', '00211', 'unity'],
 ['00200', 'clint', '01220', 'lying'],
 ['00200', 'clint', '20200', 'chick'],
 ['00200', 'clint', '00202', 'idiot'],
 ['00200', 'clint', '02222', 'flint'],
 ['00200', 'clint', '02201', 'blitz'],
 ['00200', 'clint', '12200', 'flick'],
 ['00200', 'clint', '00201', 'thigh'],
 ['00200', 'clint', '00200', 'abbot'],
 ['00200', 'clint', '10220', 'icing'],
 ['00200', 'clint', '22220', 'cling'],
 ['00200', 'clint', '02200', 'blimp'],
 ['00200', 'clint', '11200', 'icily'],
 ['00201', 'dench', '02100', 'being'],
 ['00201', 'dench', '01011', 'chief'],
 ['00201', 'dench', '01000', 'quiet'],
 ['00201', 'dench', '02002', 'weigh'],
 ['00201', 'dench', '01001', 'thief'],
 ['00201', 'dench', '22000', 'deity'],
 ['00201', 'dench', '11020', 'edict'],
 ['00201', 'dench', '11000', 'plied'],
 ['00201', 'dench', '01020', 'evict'],
 ['00201', 'dench', '02102', 'neigh'],
 ['00201', 'dench', '22100', 'deign'],
 ['00201', 'dench', '01100', 'eking'],
 ['00202', 'cloth', '01001', 'while'],
 ['00202', 'cloth', '00000', 'guide'],
 ['00202', 'cloth', '00021', 'white'],
 ['00202', 'cloth', '10100', 'voice'],
 ['00202', 'cloth', '00020', 'quite'],
 ['00202', 'cloth', '10010', 'twice'],
 ['00202', 'cloth', '02020', 'elite'],
 ['00202', 'cloth', '10000', 'juice'],
 ['00202', 'cloth', '02100', 'olive'],
 ['00202', 'cloth', '00100', 'adapt'],
 ['00202', 'cloth', '01000', 'exile'],
 ['00202', 'cloth', '02000', 'glide'],
 ['00202', 'cloth', '20001', 'chime'],
 ['00202', 'cloth', '00001', 'whine'],
 ['00202', 'cloth', '00010', 'twine'],
 ['00202', 'cloth', '01010', 'utile'],
 ['00210', 'plant', '00020', 'auger'],
 ['00210', 'plant', '01001', 'still'],
 ['00210', 'plant', '00001', 'ached'],
 ['00210', 'plant', '00002', 'shift'],
 ['00210', 'plant', '01000', 'skill'],
 ['00210', 'plant', '11000', 'spill'],
 ['00210', 'plant', '10000', 'aking'],
 ['00210', 'plant', '00021', 'sting'],
 ['00210', 'plant', '02000', 'slick'],
 ['00210', 'plant', '00010', 'scion'],
 ['00210', 'plant', '02020', 'sling'],
 ['00210', 'plant', '00022', 'stint'],
 ['00210', 'plant', '10020', 'spiny'],
 ['00210', 'plant', '00000', 'skiff'],
 ['00210', 'plant', '11002', 'spilt'],
 ['00210', 'plant', '01002', 'stilt'],
 ['00211', 'ached', '00020', 'spiel'],
 ['00211', 'ached', '00022', 'spied'],
 ['00211', 'ached', '00122', 'shied'],
 ['00212', 'plant', '00001', 'suite'],
 ['00212', 'plant', '02000', 'ached'],
 ['00212', 'plant', '01000', 'smile'],
 ['00212', 'plant', '10000', 'spice'],
 ['00212', 'plant', '00020', 'shine'],
 ['00212', 'plant', '10020', 'spine'],
 ['00212', 'plant', '10001', 'spite'],
 ['00212', 'plant', '00000', 'seize'],
 ['00212', 'plant', '10010', 'snipe'],
 ['00212', 'plant', '00010', 'snide'],
 ['00220', 'forth', '00010', 'twist'],
 ['00220', 'forth', '02000', 'noisy'],
 ['00220', 'forth', '00000', 'bliss'],
 ['00220', 'forth', '02010', 'moist'],
 ['00220', 'forth', '00002', 'swish'],
 ['00220', 'forth', '02011', 'hoist'],
 ['00220', 'forth', '00001', 'whisk'],
 ['00220', 'forth', '22010', 'foist'],
 ['00221', 'exist', '22222', 'exist'],
 ['00221', 'exist', '10222', 'heist'],
 ['00222', 'acing', '00210', 'noise'],
 ['00222', 'acing', '00201', 'guise'],
 ['00222', 'acing', '00200', 'poise'],
 ['01000', 'clout', '00222', 'about'],
 ['01000', 'clout', '11100', 'falls'],
 ['01000', 'clout', '12000', 'black'],
 ['01000', 'clout', '00101', 'abaya'],
 ['01000', 'clout', '01101', 'actin'],
 ['01000', 'clout', '00010', 'human'],
 ['01000', 'clout', '01012', 'adult'],
 ['01000', 'clout', '02200', 'along'],
 ['01000', 'clout', '00200', 'among'],
 ['01000', 'clout', '02020', 'album'],
 ['01000', 'clout', '01000', 'abram'],
 ['01000', 'clout', '00100', 'admin'],
 ['01000', 'clout', '02100', 'allow'],
 ['01000', 'clout', '00001', 'thank'],
 ['01000', 'clout', '02002', 'plant'],
 ['01000', 'clout', '02000', 'panda'],
 ['01000', 'clout', '20100', 'abhor'],
 ['01000', 'clout', '00202', 'adopt'],
 ['01000', 'clout', '02202', 'beefs'],
 ['01000', 'clout', '01020', 'awful'],
 ['01000', 'clout', '00002', 'adapt'],
 ['01000', 'clout', '01100', 'dolly'],
 ['01000', 'clout', '02220', 'aloud'],
 ['01000', 'clout', '22000', 'aking'],
 ['01000', 'clout', '00211', 'quota'],
 ['01000', 'clout', '20000', 'champ'],
 ['01000', 'clout', '21000', 'chalk'],
 ['01000', 'clout', '22200', 'cloak'],
 ['01000', 'clout', '20002', 'chant'],
 ['01000', 'clout', '00102', 'abbot'],
 ['01000', 'clout', '10100', 'mocha'],
 ['01000', 'clout', '01201', 'atoll'],
 ['01000', 'clout', '10000', 'aargh'],
 ['01000', 'clout', '00011', 'junta'],
 ['01000', 'clout', '01001', 'aptly'],
 ['01000', 'clout', '01011', 'tubal'],
 ['01000', 'clout', '11101', 'octal'],
 ['01000', 'clout', '10010', 'quack'],
 ['01000', 'clout', '02102', 'allot'],
 ['01000', 'clout', '01220', 'afoul'],
 ['01000', 'clout', '01010', 'pupal'],
 ['01001', 'cleat', '01120', 'deeps'],
 ['01001', 'cleat', '10110', 'abaca'],
 ['01001', 'cleat', '20220', 'cheap'],
 ['01001', 'cleat', '00111', 'ached'],
 ['01001', 'cleat', '00212', 'agent'],
 ['01001', 'cleat', '01121', 'amped'],
 ['01001', 'cleat', '22220', 'clean'],
 ['01001', 'cleat', '00120', 'aback'],
 ['01001', 'cleat', '00110', 'banda'],
 ['01001', 'cleat', '10220', 'ocean'],
 ['01001', 'cleat', '00220', 'ahead'],
 ['01001', 'cleat', '01111', 'delta'],
 ['01001', 'cleat', '01110', 'faked'],
 ['01001', 'cleat', '10112', 'exact'],
 ['01001', 'cleat', '10111', 'teach'],
 ['01001', 'cleat', '00112', 'meant'],
 ['01001', 'cleat', '00210', 'aargh'],
 ['01001', 'cleat', '20222', 'cheat'],
 ['01001', 'cleat', '00222', 'wheat'],
 ['01001', 'cleat', '01112', 'bends'],
 ['01001', 'cleat', '00211', 'theta'],
 ['01001', 'cleat', '10120', 'decay'],
 ['01001', 'cleat', '02110', 'alley'],
 ['01001', 'cleat', '11120', 'decal'],
 ['01001', 'cleat', '00221', 'tweak'],
 ['01001', 'cleat', '11110', 'leach'],
 ['01001', 'cleat', '02220', 'admin'],
 ['01001', 'cleat', '22222', 'cleat'],
 ['01001', 'cleat', '02222', 'pleat'],
 ['01001', 'cleat', '00122', 'begat'],
 ['01001', 'cleat', '11122', 'eclat'],
 ['01002', 'black', '02220', 'place'],
 ['01002', 'black', '10100', 'abide'],
 ['01002', 'black', '01200', 'leave'],
 ['01002', 'black', '01100', 'admin'],
 ['01002', 'black', '00220', 'peace'],
 ['01002', 'black', '02100', 'alone'],
 ['01002', 'black', '02200', 'adapt'],
 ['01002', 'black', '22200', 'admin'],
 ['01002', 'black', '00110', 'acute'],
 ['01002', 'black', '00201', 'about'],
 ['01002', 'black', '01101', 'ankle'],
 ['01002', 'black', '00200', 'adapt'],
 ['01002', 'black', '02201', 'flake'],
 ['01002', 'black', '00100', 'anode'],
 ['01002', 'black', '00101', 'awoke'],
 ['01002', 'black', '10200', 'abate'],
 ['01002', 'black', '11100', 'amble'],
 ['01002', 'black', '00210', 'chafe'],
 ['01010', 'chalk', '00220', 'abram'],
 ['01010', 'chalk', '02220', 'shall'],
 ['01010', 'chalk', '00200', 'ament'],
 ['01010', 'chalk', '00110', 'usual'],
 ['01010', 'chalk', '10202', 'actin'],
 ['01010', 'chalk', '00100', 'aarti'],
 ['01010', 'chalk', '22200', 'chaos'],
 ['01010', 'chalk', '02200', 'shaft'],
 ['01010', 'chalk', '10100', 'abaca'],
 ['01010', 'chalk', '00202', 'spank'],
 ['01010', 'chalk', '12202', 'shack'],
 ['01010', 'chalk', '10212', 'slack'],
 ['01010', 'chalk', '00210', 'slang'],
 ['01010', 'chalk', '10220', 'adapt'],
 ['01010', 'chalk', '02202', 'shank'],
 ['01010', 'chalk', '02210', 'shawl'],
 ['01010', 'chalk', '00222', 'stalk'],
 ['01010', 'chalk', '10200', 'scamp'],
 ['01010', 'chalk', '02201', 'shaky'],
 ['01010', 'chalk', '01200', 'swath'],
 ['01010', 'chalk', '02110', 'shoal'],
 ['01010', 'chalk', '00201', 'snaky'],
 ['01011', 'knelt', '10200', 'speak'],
 ['01011', 'knelt', '00102', 'asset'],
 ['01011', 'knelt', '00100', 'essay'],
 ['01011', 'knelt', '00201', 'steam'],
 ['01011', 'knelt', '00211', 'steal'],
 ['01011', 'knelt', '01100', 'sedan'],
 ['01011', 'knelt', '00202', 'sweat'],
 ['01011', 'knelt', '10201', 'steak'],
 ['01011', 'knelt', '12200', 'sneak'],
 ['01011', 'knelt', '10100', 'askew'],
 ['01012', 'klutz', '00020', 'state'],
 ['01012', 'klutz', '00000', 'dumps'],
 ['01012', 'klutz', '00010', 'stage'],
 ['01012', 'klutz', '01000', 'scale'],
 ['01012', 'klutz', '00100', 'usage'],
 ['01012', 'klutz', '02000', 'slave'],
 ['01012', 'klutz', '10000', 'snake'],
 ['01012', 'klutz', '10010', 'stake'],
 ['01012', 'klutz', '10020', 'skate'],
 ['01012', 'klutz', '02020', 'slate'],
 ['01012', 'klutz', '01010', 'stale'],
 ['01020', 'shalt', '10210', 'aches'],
 ['01020', 'shalt', '11210', 'flash'],
 ['01020', 'shalt', '10202', 'abaca'],
 ['01020', 'shalt', '10212', 'blast'],
 ['01020', 'shalt', '21200', 'smash'],
 ['01020', 'shalt', '21210', 'slash'],
 ['01020', 'shalt', '21201', 'stash'],
 ['01020', 'shalt', '10102', 'angst'],
 ['01020', 'shalt', '10100', 'abyss'],
 ['01020', 'shalt', '12200', 'chasm'],
 ['01020', 'shalt', '20200', 'spasm'],
 ['01020', 'shalt', '11200', 'aargh'],
 ['01020', 'shalt', '10200', 'amass'],
 ['01021', 'flyte', '01011', 'least'],
 ['01021', 'flyte', '00011', 'beast'],
 ['01021', 'flyte', '00111', 'yeast'],
 ['01021', 'flyte', '20011', 'feast'],
 ['01021', 'flyte', '01001', 'leash'],
 ['01022', 'butch', '00001', 'phase'],
 ['01022', 'butch', '11000', 'abuse'],
 ['01022', 'butch', '00000', 'lease'],
 ['01022', 'butch', '00011', 'chase'],
 ['01022', 'butch', '00010', 'cease'],
 ['01022', 'butch', '00100', 'tease'],
 ['01022', 'butch', '01000', 'amuse'],
 ['01022', 'butch', '10000', 'abase'],
 ['01100', 'until', '01020', 'aargh'],
 ['01100', 'until', '10020', 'audio'],
 ['01100', 'until', '01012', 'final'],
 ['01100', 'until', '00021', 'claim'],
 ['01100', 'until', '00020', 'abhor'],
 ['01100', 'until', '01021', 'plain'],
 ['01100', 'until', '10120', 'audit'],
 ['01100', 'until', '01010', 'piano'],
 ['01100', 'until', '00011', 'above'],
 ['01100', 'until', '01110', 'giant'],
 ['01100', 'until', '00010', 'pizza'],
 ['01100', 'until', '00212', 'vital'],
 ['01100', 'until', '00120', 'abaca'],
 ['01100', 'until', '01210', 'titan'],
 ['01100', 'until', '00022', 'avail'],
 ['01100', 'until', '00220', 'attic'],
 ['01100', 'until', '00112', 'tidal'],
 ['01100', 'until', '10022', 'quail'],
 ['01100', 'until', '02011', 'inlay'],
 ['01100', 'until', '02022', 'anvil'],
 ['01100', 'until', '02220', 'antic'],
 ['01100', 'until', '00121', 'plait'],
 ['01101', 'abaca', '00200', 'email'],
 ['01101', 'abaca', '00002', 'media'],
 ['01101', 'abaca', '10000', 'ideal'],
 ['01102', 'image', '22222', 'image'],
 ['01102', 'image', '20202', 'inane'],
 ['01110', 'anata', '00022', 'vista'],
 ['01110', 'anata', '00002', 'sigma'],
 ['01110', 'anata', '01210', 'stain'],
 ['01110', 'anata', '02200', 'snail'],
 ['01110', 'anata', '01200', 'slain'],
 ['01110', 'anata', '00200', 'swami'],
 ['01110', 'anata', '00210', 'staid'],
 ['01111', 'sepia', '22222', 'sepia'],
 ['01112', 'aisle', '22222', 'aisle'],
 ['01120', 'quasi', '22222', 'quasi'],
 ['01200', 'agony', '10020', 'china'],
 ['01200', 'agony', '22020', 'aging'],
 ['01200', 'agony', '21010', 'align'],
 ['01200', 'agony', '20010', 'avian'],
 ['01200', 'agony', '20000', 'axial'],
 ['01200', 'agony', '20100', 'axiom'],
 ['01200', 'agony', '20002', 'amity'],
 ['01200', 'agony', '10100', 'voila'],
 ['01200', 'agony', '20110', 'axion'],
 ['01200', 'agony', '10000', 'iliac'],
 ['01200', 'agony', '21020', 'aping'],
 ['01201', 'alien', '22222', 'alien'],
 ['01202', 'alkyd', '20000', 'anime'],
 ['01202', 'alkyd', '22000', 'alive'],
 ['01202', 'alkyd', '22100', 'alike'],
 ['01202', 'alkyd', '20001', 'abide'],
 ['01202', 'alkyd', '21000', 'agile'],
 ['01212', 'aside', '22222', 'aside'],
 ['01220', 'amiss', '22222', 'amiss'],
 ['02000', 'culty', '10010', 'blimp'],
 ['02000', 'culty', '00002', 'deign'],
 ['02000', 'culty', '20000', 'canon'],
 ['02000', 'culty', '00000', 'dogma'],
 ['02000', 'culty', '20010', 'catch'],
 ['02000', 'culty', '20002', 'adhan'],
 ['02000', 'culty', '01110', 'fault'],
 ['02000', 'culty', '01100', 'laugh'],
 ['02000', 'culty', '10002', 'fancy'],
 ['02000', 'culty', '20100', 'canal'],
 ['02000', 'culty', '00100', 'above'],
 ['02000', 'culty', '00110', 'fatal'],
 ['02000', 'culty', '10011', 'yacht'],
 ['02000', 'culty', '10000', 'abbot'],
 ['02000', 'culty', '00022', 'bebop'],
 ['02000', 'culty', '00102', 'admin'],
 ['02000', 'culty', '00010', 'abbas'],
 ['02000', 'culty', '00001', 'kayak'],
 ['02000', 'culty', '01000', 'fauna'],
 ['02000', 'culty', '01010', 'dough'],
 ['02000', 'culty', '00212', 'tally'],
 ['02000', 'culty', '10110', 'latch'],
 ['02000', 'culty', '00220', 'waltz'],
 ['02000', 'culty', '01001', 'bayou'],
 ['02000', 'culty', '00210', 'talon'],
 ['02000', 'culty', '00012', 'banal'],
 ['02000', 'culty', '10012', 'tacky'],
 ['02000', 'culty', '01002', 'gaudy'],
 ['02000', 'culty', '21100', 'caulk'],
 ['02000', 'culty', '00202', 'balmy'],
 ['02000', 'culty', '20022', 'catty'],
 ['02000', 'culty', '21010', 'caput'],
 ['02001', 'notch', '10100', 'taken'],
 ['02001', 'notch', '10000', 'abled'],
 ['02001', 'notch', '00000', 'apgar'],
 ['02001', 'notch', '00010', 'camel'],
 ['02001', 'notch', '10001', 'haven'],
 ['02001', 'notch', '10200', 'eaten'],
 ['02001', 'notch', '00001', 'hazel'],
 ['02001', 'notch', '00100', 'valet'],
 ['02001', 'notch', '00110', 'cadet'],
 ['02001', 'notch', '01010', 'cameo'],
 ['02001', 'notch', '20000', 'navel'],
 ['02001', 'notch', '00200', 'matey'],
 ['02001', 'notch', '11000', 'oaken'],
 ['02002', 'gulch', '01200', 'value'],
 ['02002', 'gulch', '00100', 'ambit'],
 ['02002', 'gulch', '00110', 'cable'],
 ['02002', 'gulch', '00000', 'maybe'],
 ['02002', 'gulch', '00020', 'dance'],
 ['02002', 'gulch', '00011', 'cache'],
 ['02002', 'gulch', '10100', 'eagle'],
 ['02002', 'gulch', '00200', 'valve'],
 ['02002', 'gulch', '21000', 'gauge'],
 ['02002', 'gulch', '10000', 'badge'],
 ['02002', 'gulch', '00120', 'lance'],
 ['02002', 'gulch', '00010', 'canoe'],
 ['02002', 'gulch', '11000', 'vague'],
 ['02002', 'gulch', '01001', 'haute'],
 ['02002', 'gulch', '00101', 'lathe'],
 ['02002', 'gulch', '01000', 'mauve'],
 ['02002', 'gulch', '00001', 'bathe'],
 ['02002', 'gulch', '00201', 'halve'],
 ['02002', 'gulch', '20000', 'gaffe'],
 ['02010', 'tolan', '01212', 'salon'],
 ['02010', 'tolan', '00011', 'sandy'],
 ['02010', 'tolan', '01012', 'mason'],
 ['02010', 'tolan', '00220', 'salad'],
 ['02010', 'tolan', '10011', 'nasty'],
 ['02010', 'tolan', '00210', 'sally'],
 ['02010', 'tolan', '10010', 'aargh'],
 ['02010', 'tolan', '00110', 'sadly'],
 ['02010', 'tolan', '20010', 'tasty'],
 ['02010', 'tolan', '00010', 'apace'],
 ['02010', 'tolan', '00121', 'nasal'],
 ['02010', 'tolan', '00120', 'basal'],
 ['02010', 'tolan', '01010', 'savoy'],
 ['02010', 'tolan', '10210', 'salty'],
 ['02010', 'tolan', '01210', 'salvo'],
 ['02011', 'easel', '22222', 'easel'],
 ['02012', 'butch', '00100', 'attap'],
 ['02012', 'butch', '01020', 'sauce'],
 ['02012', 'butch', '00110', 'caste'],
 ['02012', 'butch', '00101', 'haste'],
 ['02012', 'butch', '01100', 'saute'],
 ['02012', 'butch', '00000', 'salve'],
 ['02012', 'butch', '20100', 'baste'],
 ['02020', 'lasts', '12101', 'salsa'],
 ['02020', 'lasts', '02201', 'sassy'],
 ['02020', 'lasts', '12100', 'palsy'],
 ['02020', 'lasts', '02110', 'patsy'],
 ['02020', 'lasts', '22200', 'lasso'],
 ['02020', 'lasts', '02100', 'pansy'],
 ['02020', 'lasts', '02200', 'gassy'],
 ['02022', 'ample', '10002', 'cause'],
 ['02022', 'ample', '10012', 'false'],
 ['02022', 'ample', '10102', 'pause'],
 ['02022', 'ample', '10212', 'lapse'],
 ['02022', 'ample', '11002', 'masse'],
 ['02100', 'clint', '01100', 'valid'],
 ['02100', 'clint', '10100', 'magic'],
 ['02100', 'clint', '20110', 'cabin'],
 ['02100', 'clint', '00101', 'patio'],
 ['02100', 'clint', '10110', 'panic'],
 ['02100', 'clint', '00102', 'habit'],
 ['02100', 'clint', '00100', 'abram'],
 ['02100', 'clint', '00110', 'mania'],
 ['02100', 'clint', '10102', 'tacit'],
 ['02100', 'clint', '20101', 'cacti'],
 ['02100', 'clint', '21100', 'cavil'],
 ['02110', 'blocs', '20011', 'basic'],
 ['02110', 'blocs', '20002', 'basis'],
 ['02110', 'blocs', '20001', 'basin'],
 ['02110', 'blocs', '00001', 'satin'],
 ['02110', 'blocs', '21001', 'basil'],
 ['02200', 'adapt', '11000', 'daily'],
 ['02200', 'adapt', '10001', 'faith'],
 ['02200', 'adapt', '10012', 'paint'],
 ['02200', 'adapt', '10002', 'faint'],
 ['02200', 'adapt', '10000', 'gaily'],
 ['02202', 'admin', '10110', 'maize'],
 ['02202', 'admin', '10011', 'naive'],
 ['02202', 'admin', '10010', 'waive'],
 ['02210', 'saint', '22222', 'saint'],
 ['02220', 'daisy', '22222', 'daisy'],
 ['02220', 'daisy', '02220', 'waist'],
 ['10000', 'count', '02000', 'dimly'],
 ['10000', 'count', '01100', 'fjord'],
 ['10000', 'count', '02100', 'forum'],
 ['10000', 'count', '02011', 'north'],
 ['10000', 'count', '22202', 'court'],
 ['10000', 'count', '22000', 'color'],
 ['10000', 'count', '01022', 'front'],
 ['10000', 'count', '01010', 'badge'],
 ['10000', 'count', '01000', 'blood'],
 ['10000', 'count', '01020', 'adapt'],
 ['10000', 'count', '02001', 'chafe'],
 ['10000', 'count', '00201', 'ahold'],
 ['10000', 'count', '10201', 'truck'],
 ['10000', 'count', '11100', 'occur'],
 ['10000', 'count', '02010', 'aargh'],
 ['10000', 'count', '21010', 'crown'],
 ['10000', 'count', '01001', 'abhor'],
 ['10000', 'count', '02020', 'horny'],
 ['10000', 'count', '21000', 'ardor'],
 ['10000', 'count', '00221', 'trunk'],
 ['10000', 'count', '00220', 'drunk'],
 ['10000', 'count', '01101', 'abram'],
 ['10000', 'count', '01102', 'trout'],
 ['10000', 'count', '20100', 'alarm'],
 ['10000', 'count', '00100', 'bekah'],
 ['10000', 'count', '12000', 'porch'],
 ['10000', 'count', '12001', 'torch'],
 ['10000', 'count', '00122', 'burnt'],
 ['10000', 'count', '20002', 'crypt'],
 ['10000', 'count', '01011', 'thorn'],
 ['10000', 'count', '02210', 'mourn'],
 ['10000', 'count', '00200', 'blurb'],
 ['10000', 'count', '20210', 'churn'],
 ['10000', 'count', '20200', 'crumb'],
 ['10000', 'count', '00222', 'grunt'],
 ['10000', 'count', '02200', 'gourd'],
 ['10000', 'count', '22020', 'corny'],
 ['10000', 'count', '00000', 'abled'],
 ['10000', 'count', '10100', 'lurch'],
 ['10000', 'count', '11000', 'frock'],
 ['10000', 'count', '21100', 'croup'],
 ['10000', 'count', '21020', 'crony'],
 ['10000', 'count', '00202', 'blurt'],
 ['10000', 'count', '00101', 'thrum'],
 ['10001', 'outed', '20120', 'other'],
 ['10001', 'outed', '20021', 'addle'],
 ['10001', 'outed', '01021', 'under'],
 ['10001', 'outed', '10020', 'chowk'],
 ['10001', 'outed', '00010', 'emery'],
 ['10001', 'outed', '00020', 'pence'],
 ['10001', 'outed', '20020', 'offer'],
 ['10001', 'outed', '00220', 'enter'],
 ['10001', 'outed', '10010', 'error'],
 ['10001', 'outed', '00210', 'entry'],
 ['10001', 'outed', '02020', 'abele'],
 ['10001', 'outed', '01020', 'alack'],
 ['10001', 'outed', '02010', 'query'],
 ['10001', 'outed', '10120', 'tower'],
 ['10001', 'outed', '10210', 'metro'],
 ['10001', 'outed', '00112', 'trend'],
 ['10001', 'outed', '10011', 'decor'],
 ['10001', 'outed', '22220', 'outer'],
 ['10001', 'outed', '00022', 'abaca'],
 ['10001', 'outed', '00021', 'abele'],
 ['10001', 'outed', '00011', 'abaca'],
 ['10001', 'outed', '10220', 'voter'],
 ['10001', 'outed', '00120', 'aargh'],
 ['10001', 'outed', '10110', 'tenor'],
 ['10001', 'outed', '01220', 'utter'],
 ['10001', 'outed', '00110', 'abaca'],
 ['10001', 'outed', '00221', 'deter'],
 ['10001', 'outed', '20220', 'otter'],
 ['10001', 'outed', '20110', 'overt'],
 ['10001', 'outed', '01010', 'femur'],
 ['10001', 'outed', '01110', 'erupt'],
 ['10001', 'outed', '01120', 'truer'],
 ['10001', 'outed', '02120', 'tuber'],
 ['10001', 'outed', '01011', 'demur'],
 ['10002', 'prong', '01000', 'arete'],
 ['10002', 'prong', '01100', 'acrid'],
 ['10002', 'prong', '02200', 'debts'],
 ['10002', 'prong', '01011', 'genre'],
 ['10002', 'prong', '22200', 'prove'],
 ['10002', 'prong', '02201', 'grove'],
 ['10002', 'prong', '01010', 'nerve'],
 ['10002', 'prong', '01001', 'merge'],
 ['10002', 'prong', '01101', 'forge'],
 ['10002', 'prong', '02000', 'abets'],
 ['10002', 'prong', '01120', 'borne'],
 ['10002', 'prong', '22220', 'prone'],
 ['10002', 'prong', '21001', 'purge'],
 ['10002', 'prong', '12000', 'crepe'],
 ['10002', 'prong', '02220', 'drone'],
 ['10002', 'prong', '22020', 'prune'],
 ['10002', 'prong', '01200', 'chore'],
 ['10002', 'prong', '21000', 'puree'],
 ['10002', 'prong', '12201', 'grope'],
 ['10002', 'prong', '12200', 'trope'],
 ['10002', 'prong', '22000', 'prude'],
 ['10010', 'count', '01001', 'alkyd'],
 ['10010', 'count', '01002', 'short'],
 ['10010', 'count', '02000', 'sorry'],
 ['10010', 'count', '01000', 'sword'],
 ['10010', 'count', '00100', 'abbey'],
 ['10010', 'count', '10100', 'scrub'],
 ['10010', 'count', '01010', 'sworn'],
 ['10010', 'count', '01012', 'snort'],
 ['10010', 'count', '00102', 'strut'],
 ['10010', 'count', '11010', 'scorn'],
 ['10010', 'count', '02101', 'torus'],
 ['10010', 'count', '00200', 'slurp'],
 ['10010', 'count', '11100', 'scour'],
 ['10010', 'count', '00202', 'spurt'],
 ['10010', 'count', '00210', 'spurn'],
 ['10011', 'sheep', '20021', 'super'],
 ['10011', 'sheep', '20020', 'abhor'],
 ['10011', 'sheep', '22220', 'sheer'],
 ['10011', 'sheep', '20201', 'sperm'],
 ['10011', 'sheep', '20100', 'serum'],
 ['10011', 'sheep', '20120', 'sewer'],
 ['10011', 'sheep', '11020', 'usher'],
 ['10011', 'sheep', '20200', 'stern'],
 ['10011', 'sheep', '10020', 'loser'],
 ['10011', 'sheep', '20220', 'steer'],
 ['10011', 'sheep', '10120', 'ester'],
 ['10011', 'sheep', '10021', 'poser'],
 ['10011', 'sheep', '22020', 'shrew'],
 ['10012', 'perch', '01100', 'actin'],
 ['10012', 'perch', '01110', 'score'],
 ['10012', 'perch', '02200', 'serve'],
 ['10012', 'perch', '01101', 'shore'],
 ['10012', 'perch', '01200', 'surge'],
 ['10012', 'perch', '11200', 'spree'],
 ['10012', 'perch', '11100', 'spore'],
 ['10012', 'perch', '01210', 'scree'],
 ['10020', 'count', '21000', 'cross'],
 ['10020', 'count', '00202', 'trust'],
 ['10020', 'count', '02002', 'worst'],
 ['10020', 'count', '01000', 'gross'],
 ['10020', 'count', '00200', 'brush'],
 ['10020', 'count', '01002', 'frost'],
 ['10020', 'count', '00102', 'burst'],
 ['10020', 'count', '20200', 'crush'],
 ['10020', 'count', '20202', 'crust'],
 ['10020', 'count', '02001', 'torso'],
 ['10020', 'count', '00201', 'truss'],
 ['10020', 'count', '00002', 'tryst'],
 ['10021', 'chops', '00012', 'press'],
 ['10021', 'chops', '01001', 'fresh'],
 ['10021', 'chops', '00002', 'dress'],
 ['10021', 'chops', '20001', 'crest'],
 ['10021', 'chops', '00101', 'verso'],
 ['10021', 'chops', '20002', 'cress'],
 ['10021', 'chops', '00001', 'wrest'],
 ['10022', 'count', '02000', 'horse'],
 ['10022', 'count', '00110', 'nurse'],
 ['10022', 'count', '00000', 'verse'],
 ['10022', 'count', '00100', 'purse'],
 ['10022', 'count', '20100', 'curse'],
 ['10022', 'count', '01000', 'prose'],
 ['10022', 'count', '00001', 'terse'],
 ['10100', 'bunty', '20020', 'birth'],
 ['10100', 'bunty', '00200', 'minor'],
 ['10100', 'bunty', '01010', 'fruit'],
 ['10100', 'bunty', '00000', 'aargh'],
 ['10100', 'bunty', '00022', 'dirty'],
 ['10100', 'bunty', '00110', 'intro'],
 ['10100', 'bunty', '00002', 'ivory'],
 ['10100', 'bunty', '00001', 'lyric'],
 ['10100', 'bunty', '10010', 'orbit'],
 ['10100', 'bunty', '20000', 'birch'],
 ['10100', 'bunty', '00102', 'irony'],
 ['10100', 'bunty', '01100', 'incur'],
 ['10100', 'bunty', '01000', 'druid'],
 ['10100', 'bunty', '00100', 'groin'],
 ['10100', 'bunty', '00010', 'droit'],
 ['10100', 'bunty', '02000', 'curio'],
 ['10100', 'bunty', '00020', 'girth'],
 ['10101', 'fined', '01010', 'aarti'],
 ['10101', 'fined', '01220', 'inner'],
 ['10101', 'fined', '22020', 'abele'],
 ['10101', 'fined', '01120', 'inter'],
 ['10101', 'fined', '02020', 'pigmy'],
 ['10101', 'fined', '02021', 'caddy'],
 ['10101', 'fined', '02220', 'liner'],
 ['10101', 'fined', '02221', 'diner'],
 ['10101', 'fined', '22010', 'fiery'],
 ['10101', 'fined', '22220', 'finer'],
 ['10101', 'fined', '02120', 'nicer'],
 ['10101', 'fined', '11120', 'infer'],
 ['10101', 'fined', '01110', 'inert'],
 ['10101', 'fined', '01021', 'idler'],
 ['10102', 'aargh', '00100', 'fibre'],
 ['10102', 'aargh', '00200', 'eerie'],
 ['10102', 'aargh', '00220', 'dirge'],
 ['10110', 'about', '00020', 'virus'],
 ['10110', 'about', '00001', 'strip'],
 ['10110', 'about', '00100', 'visor'],
 ['10110', 'about', '00000', 'sprig'],
 ['10111', 'admin', '00020', 'serif'],
 ['10111', 'admin', '00012', 'siren'],
 ['10111', 'admin', '00010', 'wiser'],
 ['10111', 'admin', '00110', 'miser'],
 ['10120', 'first', '22222', 'first'],
 ['10200', 'plunk', '20020', 'print'],
 ['10200', 'plunk', '00000', 'abort'],
 ['10200', 'plunk', '20000', 'abbot'],
 ['10200', 'plunk', '00020', 'badge'],
 ['10200', 'plunk', '00022', 'drink'],
 ['10200', 'plunk', '01000', 'draft'],
 ['10200', 'plunk', '00002', 'abate'],
 ['10200', 'plunk', '02000', 'flirt'],
 ['10200', 'plunk', '20002', 'prick'],
 ['10200', 'plunk', '10000', 'crimp'],
 ['10200', 'plunk', '00102', 'quirk'],
 ['10200', 'plunk', '01001', 'krill'],
 ['10201', 'decaf', '11000', 'tried'],
 ['10201', 'decaf', '01002', 'brief'],
 ['10201', 'decaf', '12000', 'weird'],
 ...]

# new_list_3 = []

# for guesspattern in new_list_two:
#     # print(guesspattern)
#     mask = (good_paths['Pattern']==guesspattern[0])&(good_paths['Guess_2']==guesspattern[1])&(good_paths['Pattern_2']==guesspattern[2])&(good_paths['Guess_3']==guesspattern[3])
#     for pattern in good_paths.loc[mask]['Pattern_3'].unique():
#         mask_1 = (good_paths['Pattern']==guesspattern[0])&(good_paths['Guess_2']==guesspattern[1])
#         mask_2 = (good_paths['Pattern_2']==guesspattern[2])&(good_paths['Guess_3']==guesspattern[3])&(good_paths['Pattern_3']==pattern)
#         mask_3 = mask_1 & mask_2
#         mask_4 = nextStart['Solution'].isin(good_paths.loc[mask_3]['Solution'].unique())
#         gsp = nextStart.loc[mask_4]
#         # print(pattern + ' ' + good_guess(gsp))
#         item = guesspattern + [pattern,good_guess(gsp)]
#         new_list_3.append(item)

# new_list_3

# good_paths = good_paths[['Guess','Solution','Pattern','Guess_2_x','Pattern_2_x','Guess_3','Pattern_3']].rename(columns={'Guess_2_x':'Guess_2','Pattern_2_x':'Pattern_2'}).drop_duplicates()
# gg_df = pd.DataFrame(new_list_3,columns=['Pattern','Guess_2','Pattern_2','Guess_3','Pattern_3','Guess_4'])
# good_paths = good_paths.merge(gg_df,how='inner')
# good_paths = good_paths.merge(gsPairs.rename(columns={'Guess':'Guess_4','Pattern':'Pattern_4'}))
# good_paths.head(50)
# len(good_paths)
# len(good_paths.loc[good_paths['Pattern_4']=='22222'])
good_paths.loc[good_paths['Pattern_4']!='22222']

# print(len(good_paths.loc[good_paths['Pattern_4']!='22222']))
# print(len(good_paths.loc[good_paths['Pattern_4']!='22222'][['Guess_4','Pattern_4']].drop_duplicates()))

p='22222'
x=len(good_paths.loc[good_paths['Pattern']==p])
y=x
for i in range(2,5):
    x+=(len(good_paths.loc[good_paths['Pattern_'+str(i)]==p])-y)*i
    y+=len(good_paths.loc[good_paths['Pattern_'+str(i)]==p])-y

x+=(len(good_paths)-y)*6
x=x/len(good_paths)
print(y)
print(x)

2238
3.5965442764578834

## may be room to optimize last step. other option is gE.loc[gE['Expected group'].idxmax()]['Guess'].values[0]. Other option to change the loc statement to boolean test for max.

# %timeit -n 1000 Guesses['Guess'].iloc[Guesses['Expected group'].idxmin()] #37 mu s
# %timeit -n 1000 Guesses.loc[Guesses['Expected group'].idxmax()]['Guess'] #70 mu s

## Create the pivot table to choose optimal guess

# nextStart = gsPairs.merge(solution_list,how='inner',on='Solution')

# guessEvaluator = nextStart.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))
# guessEvaluator = guessEvaluator.fillna(0)
# guessEvaluator = pd.DataFrame(guessEvaluator.to_records())

# Guesses = guessEvaluator.groupby('Guess')['Solution'].count().reset_index().rename(columns = {'Guess':'Guess','Solution':'# groups'}).merge(
#     guessEvaluator.groupby('Guess')['Solution'].mean().reset_index().rename(columns = {'Guess':'Guess','Solution':'Avg group'}),how='inner',on='Guess').merge(
#     guessEvaluator.groupby('Guess')['Solution'].max().reset_index().rename(columns = {'Guess':'Guess','Solution':'Max group'}),how='inner',on='Guess').merge(
#     guessEvaluator.groupby('Guess')['Solution'].median().reset_index().rename(columns = {'Guess':'Guess','Solution':'Median group'}),how='inner',on='Guess')

# Guesses = Guesses.sort_values(by=['# groups','Max group','Avg group','Median group'],ascending=[False,True,True,True]).reset_index().drop('index',axis=1)
# Guesses = Guesses.sort_values(by=['Avg group'],ascending=[True]).reset_index().drop('index',axis=1)

# print(len(Guesses))
# print(len(nextStart['Solution'].drop_duplicates()))

# guessEvaluator.loc[guessEvaluator['Guess']=='leant'].sort_values(by=['Solution','Pattern'],ascending=[False,True])
# Guesses
# Guesses.loc[Guesses['Guess']=='crane']
# Guesses.loc[Guesses['Guess'].isin(nextStart['Solution'])]
# Guesses = Guesses.loc[Guesses['Guess'].isin(nextStart['Solution'].drop_duplicates())]

nextStart = gsPairs.merge(solution_list,how='inner',on='Solution')

def opt_guess(gsp_df,ns):
    if len(gsp_df['Solution'].unique())<2:
        return gsp_df['Solution'].iloc[0]
    else:
        gE = gsp_df.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))

        gE['Determined'] = gE['Solution'].where(gE['Solution'] < 2, other=0)
        gE.fillna(0)
        gE = pd.DataFrame(gE.to_records())
        gsp_df = gsp_df.merge(ns).merge(gE.drop(['Solution'],axis=1))
        gE_sum = gsp_df.groupby('Guess')['Determined'].sum().reset_index()
        gg = gE_sum['Guess'].iloc[gE_sum['Determined'].idxmax()]
        mask = (gsp_df['Guess']==gg) & (~gsp_df['Pattern'].isin(gE.loc[(gE['Guess']==gg)&(gE['Determined']>0)]['Pattern']))
        new_solutions = gsp_df.loc[mask & gsp_df['Solution'].isin(ns)][['Solution']].drop_duplicates()

        return (gg,new_solutions)

gsp_df = nextStart

gE = gsp_df.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))
gE.fillna(0)
gE = pd.DataFrame(gE.to_records())
gE['Determined'] = gE['Solution'].where(gE['Solution'] < 2, other=0)
max_group_size = 11
for i in range(3,max_group_size):
    gE['Group < '+str(i)] = 1
    gE['Group < '+str(i)] = gE['Group < '+str(i)].where(gE['Solution'] < i, other=0)

gsp_df = gsp_df.merge(gE.drop(['Solution'],axis=1))
# nextStart = nextStart.merge(gE.drop(['Solution'],axis=1))

# threshold = 9

# guess_lengths = []
# new_solutions = gsp_df[['Solution']].drop_duplicates()
# old_length = len(new_solutions)
# mask_0 = gsp_df['Solution'].isin(new_solutions['Solution'])
# covered = gsp_df.loc[mask_0].groupby('Guess')['Determined'].sum().max()

# while len(new_solutions)>0 and covered > threshold:
#     old_length = len(new_solutions)
#     mask_0 = gsp_df['Solution'].isin(new_solutions['Solution'])
#     gE_sum = gsp_df.loc[mask_0].groupby('Guess')['Determined'].sum().reset_index()
#     gg = gE_sum['Guess'].iloc[gE_sum['Determined'].idxmax()]

#     mask_1 = (gE['Guess']==gg)&(gE['Determined']>0) # to filter gE pivot for the patterns which determine solutions for good guess gg
#     mask_2 = gsp_df['Pattern'].isin(gE.loc[mask_1]['Pattern']) # to identify patterns in gsp_df from mask 1
#     mask_3 = (gsp_df['Guess']==gg) & (~mask_2) # to remove the solutions corresponding to these patterns filter on good guess and all other patterns
#     mask_4 = gsp_df['Solution'].isin(new_solutions['Solution']) 
#     new_solutions = gsp_df.loc[mask_3 & mask_4][['Solution']].drop_duplicates()
#     covered = old_length - len(new_solutions)
#     # gsp_df = gsp_df.merge(new_solutions) ## removing this in favor of filter at beginning of loop. should be faster and keeps gsp_df in memory

#     guess_lengths.append([gg,covered,1])


# for i in range(3,max_group_size):
#     if len(new_solutions)>0:
#         mask_0 = gsp_df['Solution'].isin(new_solutions['Solution'])
#         covered = gsp_df.loc[mask_0].groupby('Guess')['Group < '+str(i)].sum().max()
    
#     while len(new_solutions)>0 and covered > threshold:
#         old_length = len(new_solutions)
#         mask_0 = gsp_df['Solution'].isin(new_solutions['Solution'])
#         gE_sum = gsp_df.loc[mask_0].groupby('Guess')['Group < '+str(i)].sum().reset_index()
#         gg = gE_sum['Guess'].iloc[gE_sum['Group < '+str(i)].idxmax()]

#         mask_1 = (gE['Guess']==gg)&(gE['Group < '+str(i)]>0) # to filter gE pivot for the patterns which determine solutions for good guess gg
#         mask_2 = gsp_df['Pattern'].isin(gE.loc[mask_1]['Pattern']) # to identify patterns in gsp_df from mask 1
#         mask_3 = (gsp_df['Guess']==gg) & (~mask_2) # to remove the solutions corresponding to these patterns filter on good guess and all other patterns
#         mask_4 = gsp_df['Solution'].isin(new_solutions['Solution']) 
#         new_solutions = gsp_df.loc[mask_3 & mask_4][['Solution']].drop_duplicates()
#         covered = old_length - len(new_solutions)
#         # gsp_df = gsp_df.merge(new_solutions) ## removing this in favor of filter at beginning of loop. should be faster and keeps gsp_df in memory

#         guess_lengths.append([gg,covered,i-1])

## latest is 9m 6s to run with all the larger groups.

## 4m 30s to run the loop and catalogue the guesses. Something wrong w first guess because seems to reduce by too much # of solns.
## of this 25s or so is just the up-front part of pivoting. In future want to just filter gsp without re-defining.
## Can also immediately filter on 'determined' column is 1 which will speed up.

guess_lengths
# new_solutions

# guess_lengths = pd.DataFrame(guess_lengths,columns=['Guess','Covered','Max group size'])
# writer = pd.ExcelWriter(r"1. IO files/Good_guesses_v2.xlsx",engine='xlsxwriter')
# guess_lengths.to_excel(writer, sheet_name='Guesses', index=False)
# new_solutions.to_excel(writer, sheet_name='Uncovered solutions', index=False)
# writer.close()

# guess_lengths = pd.read_excel(r"1. IO files/Good_guesses_v2.xlsx",sheet_name='Guesses',dtype={'Guess': str, 'Covered': int, 'Max group size': int})
# new_solutions = pd.read_excel(r"1. IO files/Good_guesses_v2.xlsx",sheet_name='Uncovered solutions',dtype={'Solution': str})

len(Guesses[Guesses['Expected group']<95])

227

## Infeasible to test all two-guess paths. Best we can do for now is identify a subset and choose.
## takes about 10-11min with a size 227 starting guess_list

# max_exp_group = 95

# mask_1 = nextStart['Guess'].isin(Guesses[Guesses['Expected group']<max_exp_group]['Guess'].unique())
# ggsp_df = nextStart.loc[mask_1].merge(nextStart[mask_1].rename(columns={'Guess':'Guess_2','Pattern':'Pattern_2'}))
# ggsp_df.head()
# len(ggsp_df)

# ggE = ggsp_df.pivot_table(index=['Guess','Pattern','Guess_2','Pattern_2'],values='Solution',aggfunc=lambda x: len(x.unique()))
# ggE.fillna(0)
# ggE = pd.DataFrame(ggE.to_records())

# max_group_size = 21
# col_list = []

# for i in range(2,max_group_size):
#     col_name = 'Group < '+str(i)
#     ggE[col_name] = 1
#     ggE[col_name] = ggE[col_name].where(ggE['Solution'] < i, other=0)
#     col_list.append(col_name)

# ggE.head(60)
# col_list

# ggE_1 = ggE.pivot_table(index=['Guess','Pattern','Guess_2'],values=['Solution'] + col_list,aggfunc='sum')
# # ggE_1 = ggE.pivot_table(index=['Guess','Pattern','Guess_2'],values=['Pattern_2','Solution'] + col_list,aggfunc={'Pattern_2':lambda x: len(x.unique()),'Solution':'sum'}) # in case want pattern count

# ggE_1.fillna(0)
# ggE_1 = pd.DataFrame(ggE_1.to_records())
# ggE_1 = ggE_1.sort_values(by=['Guess','Pattern','Group < 2'],ascending=[True,True,False]).reset_index().drop(columns='index')

# ggE_2 = ggE_1.pivot_table(index=['Guess','Pattern'],values=col_list + ['Solution'], aggfunc='max')
# ggE_2.fillna(0)
# ggE_2 = pd.DataFrame(ggE_2.to_records())

# ggE_3 = ggE_2.pivot_table(index=['Guess'],values=col_list + ['Solution'], aggfunc='sum')
# ggE_3.fillna(0)
# ggE_3 = pd.DataFrame(ggE_3.to_records())

# ggE_2 = ggE_2.sort_values(by=['Guess','Solution'],ascending=[True,False]).reset_index().drop(columns='index')

# prim_sort = 'Group < 20'
# ggE_3 = ggE_3.sort_values(by=[prim_sort] + col_list,ascending=False).reset_index().drop(columns='index')

# ggE_1.head(20)
# ggE_2.head(50)
ggE_3.head(60)
# len(ggE_3)

writer = pd.ExcelWriter(r"1. IO files/GG_values.xlsx",engine='xlsxwriter')
ggE_3.to_excel(writer, sheet_name='Guesses outcomes', index=False)
writer.close()

# guess_lengths
# len(guess_lengths)
# import xlsxwriter

# guess_lengths = pd.DataFrame(guess_lengths,columns=['Guess','Remaining Solutions'])

# writer = pd.ExcelWriter(r"1. IO files/Good_guesses_-1.xlsx",engine='xlsxwriter')
# guess_lengths.to_excel(writer, sheet_name='Guesses', index=False)
# writer.close()

# gsp_df = nextStart

# gE = gsp_df.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))
# gE['Determined'] = gE['Solution'].where(gE['Solution'] < 2, other=0)
# gE.fillna(0)
# gE = pd.DataFrame(gE.to_records())

# gsp_df = gsp_df.merge(gE.drop(['Solution'],axis=1))

# len(gsp_df['Solution'].unique())
# guess_lengths = guess_lengths.reset_index()
# guess_lengths

# filter_1 = gsp_df['Determined']==1
# filter_2 = gsp_df['Guess'].isin(guess_lengths['Guess'])
# filter_3 = filter_1 & filter_2
# path_1 = gsp_df[filter_2].merge(guess_lengths).sort_values(by='index',ascending=True)
# len(gsp_df[filter_3]['Solution'].unique())

# path_1[path_1['Guess']=='laten']

# writer = pd.ExcelWriter(r"1. IO files/Outputs.xlsx",engine='xlsxwriter')
# path_1[path_1['Guess']=='laten'].to_excel(writer, sheet_name='Test', index=False)
# writer.close()

# print(f'{len(nextStart):,}')
# gsp_df = nextStart
# ns = solution_list

# gE = gsp_df.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))

# gE['Determined'] = gE['Solution'].where(gE['Solution'] < 2, other=0)
# gE.fillna(0)
# gE = pd.DataFrame(gE.to_records())

# gE.head(50)

ns = new_solutions
gsp_df = gsp_df.merge(ns)
gsp_df = gsp_df.merge(gE.drop(['Solution'],axis=1))
print(f'{len(gsp_df):,}')
gsp_df.head()

# gE_sum = gsp_df.groupby('Guess')['Determined'].sum().reset_index()
# gE_sum = gE_sum.sort_values(by='Determined',ascending=False).reset_index().drop('index',axis=1)
# gE_sum.head(10)

# gg = gE_sum['Guess'].iloc[gE_sum['Determined'].idxmax()]
# gg

# mask_1 = (gE['Guess']==gg)&(gE['Determined']>0)
# mask_2 = gsp_df['Pattern'].isin(gE.loc[mask_1]['Pattern'])
# mask = (gsp_df['Guess']==gg) & (~mask_2)
# mask_3 = gsp_df['Solution'].isin(ns['Solution'])
# new_solutions = gsp_df.loc[mask & mask_3][['Solution']].drop_duplicates()

# print(len(new_solutions))
# new_solutions.head()

14,724,294

print(f'{len(nextStart):,}')

15,258,165

guesses = []
lengths = []
nextStart = gsPairs.merge(solution_list)
ns = solution_list

og = opt_guess(nextStart,ns)
guesses.append(og[0])
lengths.append(len(og[1]))
ns = nextStart.merge(og[1])[['Solution']].drop_duplicates()

# while len(og[1])>0:
#     og = opt_guess(nextStart)
#     guesses.append(og[0])
#     lengths.append(len(og[1]))
#     nextStart = nextStart.merge(og[1])

for i in range(len(guesses)):
    print(guesses[i]+': '+str(lengths[i]))

## This will take a very long time. 40s for the first guess and only revealed less than 50 solutions. We need to do that more than 50 times.



# gE = nextStart.pivot_table(index=['Guess','Pattern'],values='Solution',aggfunc=lambda x: len(x.unique()))

# gE['Determined'] = gE['Solution'].where(gE['Solution'] < 2, other=0)
# gE_sum = gE.groupby('Guess')['Determined'].sum().reset_index()
# # gE_sum = gE_sum.sort_values(by=['Determined'],ascending=[False])
# # gE_sum
# mask = (nextStart['Guess']=='caron') & (nextStart['Pattern'].isin(gE.loc[gE['Determined']>0]['Pattern']))
# nextStart.loc[mask][['Solution']].drop_duplicates()

# del gE
# del gE_sum
# del mask
# guesses = []
# lengths = []
# nextStart = gsPairs.merge(solution_list)

laten: 0

# inputs = pd.read_excel(r"1. IO files\Inputs.xlsx",sheet_name='Inputs',dtype={'Person': str, 'Guess': int, 'Pattern': str, 'Date':str})
# inputs = inputs.loc[inputs['Date']==str(pd.to_datetime('today').normalize())].reset_index().drop('index',axis=1)

# guesses_df = gsPairs
# solutions_df = guesses_df
# solutions_df = gsPairs.merge(solution_list,how='inner',on='Solution')
# max_guess = inputs.Guess.max()

# for person in inputs['Person'].drop_duplicates():
#     print(person)
#     test_paths = path_solver(person,inputs,guesses_df,solutions_df,path_length=1)
#     solution_short = test_paths[['Solution']].drop_duplicates()
#     solutions_df = solutions_df.merge(solution_short,how='inner',on='Solution')
#     writer = pd.ExcelWriter(r"1. IO files\LG_"+person+".xlsx",engine='xlsxwriter')
#     test_paths.to_excel(writer, sheet_name='LG', index=False)
#     writer.close()

# for person in inputs['Person'].drop_duplicates():
#     print(person)
#     test_paths = path_solver(person,inputs,guesses_df,solutions_df,path_length=max_guess)
#     solution_short = test_paths[['Solution']].drop_duplicates()
#     solutions_df = solutions_df.merge(solution_short,how='inner',on='Solution')
#     writer = pd.ExcelWriter(r"1. IO files\Paths_"+person+".xlsx",engine='xlsxwriter')
#     test_paths.to_excel(writer, sheet_name='Paths', index=False)
#     writer.close()

# del guesses_df
# del solutions_df

writer = pd.ExcelWriter(r"1. IO files\Outputs.xlsx",engine='xlsxwriter')
solution_short.to_excel(writer, sheet_name='Solutions', index=False)
solution_short.merge(solution_list,how='inner',on='Solution').to_excel(writer, sheet_name='Short list', index=False)
writer.close()

# inputs = pd.read_excel(r"1. IO files\Inputs.xlsx",sheet_name='Inputs',dtype={'Person': str, 'Guess': int, 'Pattern': str, 'Date':str})
# inputs = inputs.loc[inputs['Date']==str(pd.to_datetime('today').normalize())].reset_index().drop('index',axis=1)

guesses_df = gsPairs
solutions_df = guesses_df
# solutions_df = gsPairs.merge(solution_list,how='inner',on='Solution')
max_guess = inputs.Guess.max()

writer = pd.ExcelWriter(r"1. IO files\Outputs.xlsx",engine='xlsxwriter')

for person in inputs['Person'].drop_duplicates():
    print(person)
    test_paths = path_solver(person,inputs,guesses_df,solutions_df,path_length=1)
    solution_short = test_paths[['Solution']].drop_duplicates()
    solutions_df = solutions_df.merge(solution_short,how='inner',on='Solution')
    # test_paths.to_excel(writer, sheet_name='LG_'+person, index=False)

for person in inputs['Person'].drop_duplicates():
    print(person)
    test_paths = path_solver(person,inputs,guesses_df,solutions_df,path_length=max_guess)
    solution_short = test_paths[['Solution']].drop_duplicates()
    solutions_df = solutions_df.merge(solution_short,how='inner',on='Solution')
    test_paths.to_excel(writer, sheet_name='Paths_'+person, index=False)

# del guesses_df
# del solutions_df

solution_short.to_excel(writer, sheet_name='Solutions', index=False)
solution_short.merge(solution_list,how='inner',on='Solution').to_excel(writer, sheet_name='Short list', index=False)
writer.close()

Shannon
Last guess = 3
02122
Alex
Last guess = 4
02222
Serena
Last guess = 3
02122
Marc
Last guess = 4
22120
Sade
Last guess = 4
02122
Shannon
Last guess = 3
02122
00120
Alex
Last guess = 4
02222
01111
11010
Serena
Last guess = 3
02122
00111
Marc
Last guess = 4
22120
00120
11000
Sade
Last guess = 4
02122
01122
01111

	Guess	# groups	Avg group	Max group	Median group
0	trace	150	15.433333	246	5.0
1	crate	148	15.641892	246	4.0
2	slate	147	15.748299	221	5.0
3	carte	146	15.856164	246	4.0
4	parse	146	15.856164	270	4.0
...	...	...	...	...	...
6586	queue	33	70.151515	942	7.0
6587	cocco	33	70.151515	1319	6.0
6588	abaya	32	72.343750	1001	12.0
6589	jazzy	31	74.677419	1111	3.0
6590	jaffa	30	77.166667	1247	5.0

	Guess	Group < 10	Group < 11	Group < 12	Group < 13	Group < 14	Group < 15	Group < 16	Group < 17	Group < 18	...	Group < 2	Group < 20	Group < 3	Group < 4	Group < 5	Group < 6	Group < 7	Group < 8	Group < 9	Solution
0	dealt	1076	1083	1086	1091	1094	1098	1100	1102	1103	...	742	1103	890	962	1008	1036	1056	1067	1072	2315
1	train	1070	1080	1086	1088	1089	1089	1090	1092	1095	...	725	1096	882	958	1000	1029	1041	1057	1067	2315
2	trail	1067	1075	1079	1081	1084	1084	1087	1088	1089	...	708	1091	871	953	998	1021	1035	1044	1052	2315
3	trans	1069	1073	1076	1077	1078	1078	1080	1085	1086	...	689	1089	850	935	984	1018	1036	1053	1065	2315
4	corse	1059	1064	1065	1072	1076	1078	1078	1082	1082	...	720	1083	874	952	997	1019	1032	1044	1051	2315
5	crise	1062	1068	1069	1070	1075	1077	1078	1079	1079	...	706	1081	880	952	992	1020	1036	1048	1055	2315
6	trice	1053	1062	1064	1068	1070	1071	1076	1078	1078	...	712	1078	868	953	999	1017	1029	1039	1047	2315
7	roast	1051	1058	1061	1066	1067	1070	1072	1075	1075	...	688	1078	851	948	981	1009	1030	1037	1046	2315
8	toile	1054	1059	1061	1066	1068	1068	1069	1070	1072	...	704	1075	865	932	977	1007	1023	1037	1046	2315
9	crone	1053	1059	1062	1065	1070	1072	1073	1073	1073	...	710	1073	861	939	978	1007	1023	1039	1048	2315
10	leant	1051	1059	1062	1066	1068	1072	1073	1073	1073	...	699	1073	862	951	989	1017	1031	1040	1047	2315
11	lance	1043	1049	1055	1055	1059	1062	1064	1066	1067	...	703	1068	844	926	976	1004	1019	1027	1037	2315
12	trine	1049	1054	1055	1059	1062	1062	1062	1063	1064	...	711	1067	852	942	982	1006	1023	1035	1044	2315
13	siren	1045	1047	1051	1054	1055	1057	1061	1062	1064	...	683	1066	857	922	959	995	1014	1024	1038	2315
14	sonar	1042	1044	1048	1048	1052	1054	1056	1059	1062	...	680	1063	833	918	960	993	1011	1022	1033	2315
15	palet	1032	1040	1045	1049	1050	1052	1054	1056	1059	...	719	1060	862	920	958	982	1003	1016	1028	2315
16	trial	1031	1040	1043	1046	1050	1050	1054	1056	1057	...	672	1058	843	923	966	987	999	1009	1019	2315
17	sitar	1031	1038	1042	1045	1047	1050	1052	1056	1057	...	672	1058	821	911	959	986	1004	1015	1026	2315
18	cries	1034	1039	1040	1042	1046	1051	1054	1055	1055	...	689	1056	851	924	958	982	1007	1018	1028	2315
19	snore	1032	1038	1042	1048	1049	1051	1052	1053	1053	...	676	1056	841	914	962	981	1006	1012	1020	2315
20	plate	1026	1030	1037	1040	1045	1047	1047	1050	1055	...	712	1055	847	919	957	983	1005	1013	1020	2315
21	noise	1031	1037	1041	1043	1044	1046	1048	1050	1051	...	687	1055	845	913	954	981	1006	1014	1024	2315
22	crane	1036	1043	1043	1045	1048	1050	1050	1050	1051	...	683	1054	836	916	956	985	1000	1016	1029	2315
23	sorel	1030	1036	1038	1043	1044	1048	1050	1053	1053	...	676	1054	823	906	953	977	994	1005	1027	2315
24	alien	1031	1038	1039	1042	1045	1048	1051	1051	1051	...	663	1053	827	910	952	986	1011	1020	1025	2315
25	tolar	1025	1035	1038	1038	1042	1044	1045	1048	1048	...	691	1050	829	910	955	978	992	1007	1016	2315
26	stone	1028	1030	1034	1037	1038	1041	1045	1047	1048	...	681	1050	832	911	952	978	993	1011	1024	2315
27	sorta	1023	1030	1033	1038	1040	1041	1045	1048	1048	...	674	1050	825	904	942	982	996	1006	1015	2315
28	rinse	1029	1035	1037	1039	1041	1041	1044	1044	1046	...	675	1049	831	905	952	981	996	1007	1019	2315
29	canoe	1026	1032	1036	1039	1040	1041	1041	1043	1045	...	700	1046	842	915	950	976	993	1003	1017	2315
30	least	1020	1026	1029	1034	1036	1036	1037	1041	1043	...	671	1044	831	907	943	971	996	1005	1013	2315
31	stair	1015	1021	1026	1028	1031	1033	1037	1038	1041	...	660	1044	810	906	946	971	989	998	1010	2315
32	solar	1014	1020	1026	1030	1030	1033	1034	1038	1040	...	662	1041	809	895	943	978	994	1000	1009	2315
33	caret	1027	1028	1034	1034	1036	1036	1038	1039	1040	...	678	1040	836	905	945	976	998	1010	1019	2315
34	toner	1014	1018	1024	1029	1032	1034	1036	1037	1038	...	675	1040	826	892	938	968	983	995	1007	2315
35	rails	1014	1021	1025	1028	1033	1035	1036	1036	1038	...	641	1040	813	898	948	975	985	994	1005	2315
36	alone	1013	1017	1019	1022	1026	1029	1031	1032	1035	...	678	1039	823	897	936	972	988	1001	1006	2315
37	aline	1018	1025	1026	1030	1032	1034	1037	1037	1037	...	662	1039	820	897	941	975	993	1005	1013	2315
38	maile	1014	1019	1021	1026	1028	1030	1032	1034	1035	...	674	1036	825	901	939	967	987	1001	1008	2315
39	tenor	1014	1018	1022	1027	1028	1028	1031	1031	1031	...	668	1035	819	887	937	967	977	995	1009	2315
40	score	1006	1010	1012	1018	1022	1025	1027	1031	1031	...	670	1032	822	895	936	963	975	987	997	2315
41	thale	1007	1010	1014	1020	1023	1027	1030	1031	1031	...	687	1031	821	892	928	956	979	990	1002	2315
42	artis	1006	1013	1018	1020	1023	1025	1027	1028	1030	...	646	1031	799	883	921	951	976	987	998	2315
43	snarl	1007	1014	1017	1020	1025	1026	1027	1027	1028	...	654	1029	800	885	926	951	972	988	1001	2315
44	lairs	999	1010	1012	1013	1019	1021	1022	1024	1026	...	647	1029	799	886	933	961	971	982	989	2315
45	thane	1009	1011	1014	1018	1021	1024	1026	1028	1028	...	685	1028	820	896	935	964	984	995	1000	2315
46	soler	1002	1007	1009	1016	1018	1021	1024	1027	1027	...	654	1028	792	873	926	954	969	979	995	2315
47	stole	1004	1008	1012	1015	1018	1022	1025	1026	1026	...	651	1028	789	879	929	957	976	984	995	2315
48	louie	1000	1003	1012	1016	1018	1018	1021	1024	1025	...	646	1028	798	872	926	952	970	988	992	2315
49	caste	1008	1011	1014	1015	1022	1022	1024	1024	1025	...	669	1027	811	896	933	961	975	989	997	2315
50	peart	1000	1002	1008	1015	1019	1023	1024	1025	1025	...	693	1026	836	894	935	962	974	988	992	2315
51	claes	1004	1006	1014	1016	1021	1023	1023	1024	1025	...	663	1026	806	887	937	960	980	987	999	2315
52	react	1003	1008	1016	1019	1020	1021	1023	1024	1025	...	662	1026	822	905	937	954	970	984	994	2315
53	scale	1001	1006	1013	1015	1020	1021	1022	1022	1023	...	666	1024	792	880	924	961	976	987	994	2315
54	siler	997	1001	1005	1012	1012	1015	1016	1019	1020	...	659	1022	804	874	924	951	965	978	992	2315
55	rance	1003	1007	1010	1011	1015	1016	1016	1016	1019	...	653	1021	796	880	923	949	967	980	991	2315
56	rials	993	1000	1006	1008	1015	1018	1018	1018	1019	...	634	1021	798	880	932	954	967	976	985	2315
57	tiles	1002	1005	1011	1011	1016	1017	1018	1019	1020	...	657	1020	799	878	917	947	967	981	993	2315
58	tries	1001	1006	1010	1014	1015	1017	1019	1020	1020	...	652	1020	800	884	924	954	970	984	992	2315
59	resit	1002	1004	1009	1010	1014	1015	1017	1018	1019	...	645	1020	804	882	929	960	979	985	992	2315

Carol: wordle backsolver¶

Creating guess solution pairs¶

Path solver - backsolving for others' results¶

Guess optimization¶

Optimal solution paths¶

Forward-looking algorithm¶

First attempt¶

Archive - pivot approach to finding individual guesses¶

Backward looking algorithm¶

Further automation¶

	Guess	Solution	Pattern
0	about	about	22222
1	about	other	00101
2	about	which	00000
3	about	their	00001
4	about	there	00001

	Guess	Solution	Pattern	Guess_2	Pattern_2	Guess_3	Pattern_3	Guess_4	Pattern_4
37	raise	flank	01000	clout	02000	panda	01100	blank	02222
40	raise	bland	01000	clout	02000	panda	01110	gland	02222
98	raise	power	10001	outed	10020	chowk	00110	alamo	00001
99	raise	lower	10001	outed	10020	chowk	00110	alamo	01001
100	raise	mower	10001	outed	10020	chowk	00110	alamo	00011
...	...	...	...	...	...	...	...	...	...
1777	raise	bawdy	02000	culty	00002	deign	10000	paddy	02022
1818	raise	tatty	02000	culty	00022	bebop	00000	fatty	02222
1835	raise	taunt	02000	culty	01010	dough	00200	jetty	00110
1836	raise	jaunt	02000	culty	01010	dough	00200	jetty	20100
1837	raise	vaunt	02000	culty	01010	dough	00200	jetty	00100

	Guess	Covered	Max group size
0	laten	41	1
1	caron	40	1
2	metro	39	1
3	piler	39	1
4	beads	38	1
...	...	...	...
124	taint	12	5
125	valor	10	5
126	plink	9	5
127	syrah	10	6
128	dolls	8	6