42#define BANDIT_NAME "exp3"
113 oneminusgamma = 1 - banditdata->gamma;
114 gammaoverk = banditdata->gamma / (
SCIP_Real)nactions;
115 weightsum = banditdata->weightsum;
116 weights = banditdata->weights;
124 for(
i = 0;
i < nactions - 1; ++
i )
129 prob = oneminusgamma * weights[
i] / weightsum + gammaoverk;
169 beta = banditdata->beta;
170 oneminusgamma = 1.0 - banditdata->gamma;
171 gammaoverk = banditdata->gamma * eta;
172 weights = banditdata->weights;
173 weightsum = banditdata->weightsum;
174 newweightsum = weightsum;
180 probai = oneminusgamma * weights[
selection] / weightsum + gammaoverk;
184 gainestim = score / probai;
186 weights[
selection] *= exp(eta * gainestim);
195 for( j = 0; j < nactions; ++j )
198 probaj = oneminusgamma * weights[j] / weightsum + gammaoverk;
204 gainestim = (score + beta) / probaj;
206 gainestim = beta / probaj;
208 weights[j] *= exp(eta * gainestim);
209 newweightsum += weights[j];
213 banditdata->weightsum = newweightsum;
231 weights = banditdata->weights;
238 if( priorities !=
NULL )
245 for(
i = 0;
i < nactions; ++
i )
248 priosum += priorities[
i];
254 normalization = nactions / priosum;
255 for(
i = 0;
i < nactions; ++
i )
256 weights[
i] = (priorities[
i] * normalization) +
NUMTOL;
261 for(
i = 0;
i < nactions; ++
i )
268 for(
i = 0;
i < nactions; ++
i )
290 unsigned int initseed
298 banditdata->gamma = gammaparam;
299 banditdata->beta = beta;
300 assert(gammaparam >= 0 && gammaparam <= 1);
301 assert(beta >= 0 && beta <= 1);
318 unsigned int initseed
344 assert(gammaparam >= 0 && gammaparam <= 1);
346 banditdata->gamma = gammaparam;
357 assert(beta >= 0 && beta <= 1);
359 banditdata->beta = beta;
370 assert(banditdata->weightsum > 0.0);
373 return (1.0 - banditdata->gamma) * banditdata->weights[action] / banditdata->weightsum + banditdata->gamma / (
SCIP_Real)
SCIPbanditGetNActions(exp3);
384 SCIPbanditFreeExp3, SCIPbanditSelectExp3, SCIPbanditUpdateExp3, SCIPbanditResetExp3) );
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
internal methods for bandit algorithms
SCIP_RETCODE SCIPbanditCreateExp3(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
SCIP_RETCODE SCIPincludeBanditvtableExp3(SCIP *scip)
internal methods for Exp.3 bandit algorithm
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)),)
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
assert(minobj< SCIPgetCutoffbound(scip))
#define BMSfreeBlockMemory(mem, ptr)
#define BMSallocBlockMemory(mem, ptr)
#define BMSallocBlockMemoryArray(mem, ptr, num)
#define BMSfreeBlockMemoryArray(mem, ptr, num)
struct BMS_BufMem BMS_BUFMEM
struct BMS_BlkMem BMS_BLKMEM
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
public methods for bandit algorithms
public methods for message output
public data structures and miscellaneous methods
public methods for bandit algorithms
public methods for memory management
public methods for random numbers
#define SCIP_DECL_BANDITUPDATE(x)
#define SCIP_DECL_BANDITFREE(x)
struct SCIP_Bandit SCIP_BANDIT
struct SCIP_BanditData SCIP_BANDITDATA
#define SCIP_DECL_BANDITSELECT(x)
struct SCIP_BanditVTable SCIP_BANDITVTABLE
#define SCIP_DECL_BANDITRESET(x)
struct SCIP_RandNumGen SCIP_RANDNUMGEN
enum SCIP_Retcode SCIP_RETCODE