-
Notifications
You must be signed in to change notification settings - Fork 489
Cleanup recovery code #6261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Cleanup recovery code #6261
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -222,8 +222,8 @@ protected TabletServer(ServerOpts opts, | |
| super(ServerId.Type.TABLET_SERVER, opts, serverContextFactory, args); | ||
| context = super.getContext(); | ||
| final AccumuloConfiguration aconf = getConfiguration(); | ||
| log.info("Version " + Constants.VERSION); | ||
| log.info("Instance " + getInstanceID()); | ||
| log.info("Version {}", Constants.VERSION); | ||
| log.info("Instance {}", getInstanceID()); | ||
| this.sessionManager = new SessionManager(context); | ||
| this.logSorter = new LogSorter(this); | ||
| this.statsKeeper = new TabletStatsKeeper(); | ||
|
|
@@ -423,7 +423,7 @@ private HostAndPort getManagerAddress() { | |
| } | ||
| return HostAndPort.fromString(managers.iterator().next().toHostPortString()); | ||
| } catch (Exception e) { | ||
| log.warn("Failed to obtain manager host " + e); | ||
| log.warn("Failed to obtain manager host", e); | ||
| } | ||
|
|
||
| return null; | ||
|
|
@@ -438,7 +438,7 @@ private ManagerClientService.Client managerConnection(HostAndPort address) { | |
| // log.info("Listener API to manager has been opened"); | ||
| return ThriftUtil.getClient(ThriftClientTypes.MANAGER, address, getContext()); | ||
| } catch (Exception e) { | ||
| log.warn("Issue with managerConnection (" + address + ") " + e, e); | ||
| log.warn("Issue with managerConnection ({}) {}", address, e, e); | ||
| } | ||
| return null; | ||
| } | ||
|
|
@@ -735,7 +735,7 @@ public TServerInstance getTabletSession() { | |
| try { | ||
| return new TServerInstance(getAdvertiseAddress().toString(), lockSessionId); | ||
| } catch (Exception ex) { | ||
| log.warn("Unable to read session from tablet server lock" + ex); | ||
| log.warn("Unable to read session from tablet server lock", ex); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment here about behavior change. |
||
| return null; | ||
| } | ||
| } | ||
|
|
@@ -890,15 +890,15 @@ public boolean needsRecovery(TabletMetadata tabletMetadata) { | |
| } | ||
|
|
||
| try { | ||
| return logger.needsRecovery(getContext(), tabletMetadata.getExtent(), logEntries); | ||
| return logger.needsRecovery(tabletMetadata.getExtent(), logEntries); | ||
| } catch (IOException e) { | ||
| throw new UncheckedIOException(e); | ||
| } | ||
| } | ||
|
|
||
| public void recover(VolumeManager fs, KeyExtent extent, Collection<LogEntry> logEntries, | ||
| Set<String> tabletFiles, MutationReceiver mutationReceiver) throws IOException { | ||
| logger.recover(getContext(), extent, logEntries, tabletFiles, mutationReceiver); | ||
| public void recover(KeyExtent extent, Collection<LogEntry> logEntries, Set<String> tabletFiles, | ||
| MutationReceiver mutationReceiver) throws IOException { | ||
| logger.recover(extent, logEntries, tabletFiles, mutationReceiver); | ||
| } | ||
|
|
||
| public int createLogId() { | ||
|
|
@@ -1016,7 +1016,7 @@ private void markUnusedWALs() { | |
| try { | ||
| TServerInstance session = this.getTabletSession(); | ||
| for (DfsLogger candidate : eligible) { | ||
| log.info("Marking " + candidate.getPath() + " as unreferenced"); | ||
| log.info("Marking {} as unreferenced", candidate.getPath()); | ||
| walMarker.walUnreferenced(session, candidate.getPath()); | ||
| } | ||
| synchronized (closedLogs) { | ||
|
|
@@ -1028,7 +1028,7 @@ private void markUnusedWALs() { | |
| } | ||
|
|
||
| public void addNewLogMarker(DfsLogger copy) throws WalMarkerException { | ||
| log.info("Writing log marker for " + copy.getPath()); | ||
| log.info("Writing log marker for {}", copy.getPath()); | ||
| walMarker.addNewWalMarker(getTabletSession(), copy.getPath()); | ||
| } | ||
|
|
||
|
|
@@ -1040,7 +1040,7 @@ public void walogClosed(DfsLogger currentLog) throws WalMarkerException { | |
| closedLogs.add(currentLog); | ||
| clSize = closedLogs.size(); | ||
| } | ||
| log.info("Marking " + currentLog.getPath() + " as closed. Total closed logs " + clSize); | ||
| log.info("Marking {} as closed. Total closed logs {}", currentLog.getPath(), clSize); | ||
| walMarker.closeWal(getTabletSession(), currentLog.getPath()); | ||
|
|
||
| // whenever a new log is added to the set of closed logs, go through all of the tablets and | ||
|
|
@@ -1058,8 +1058,7 @@ public void walogClosed(DfsLogger currentLog) throws WalMarkerException { | |
| } | ||
| } | ||
| } else { | ||
| log.info( | ||
| "Marking " + currentLog.getPath() + " as unreferenced (skipping closed writes == 0)"); | ||
| log.info("Marking {} as unreferenced (skipping closed writes == 0)", currentLog.getPath()); | ||
| walMarker.walUnreferenced(getTabletSession(), currentLog.getPath()); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,7 +50,6 @@ | |
| import org.apache.accumulo.core.util.Retry; | ||
| import org.apache.accumulo.core.util.Retry.RetryFactory; | ||
| import org.apache.accumulo.core.util.threads.Threads; | ||
| import org.apache.accumulo.server.ServerContext; | ||
| import org.apache.accumulo.server.log.WalStateManager.WalMarkerException; | ||
| import org.apache.accumulo.tserver.TabletMutations; | ||
| import org.apache.accumulo.tserver.TabletServer; | ||
|
|
@@ -293,7 +292,7 @@ private synchronized void startLogMaker() { | |
| try { | ||
| tserver.addNewLogMarker(alog); | ||
| } catch (Exception e) { | ||
| log.error("Failed to add new WAL marker for " + alog.getLogEntry(), e); | ||
| log.error("Failed to add new WAL marker for {}", alog.getLogEntry(), e); | ||
|
|
||
| try { | ||
| // Intentionally not deleting walog because it may have been advertised in ZK. See | ||
|
|
@@ -311,7 +310,7 @@ private synchronized void startLogMaker() { | |
| try { | ||
| tserver.walogClosed(alog); | ||
| } catch (WalMarkerException | RuntimeException e2) { | ||
| log.error("Failed to close WAL that failed to open: " + alog.getLogEntry(), e2); | ||
| log.error("Failed to close WAL that failed to open: {}", alog.getLogEntry(), e2); | ||
| } | ||
|
|
||
| try { | ||
|
|
@@ -348,7 +347,7 @@ private synchronized void close() throws IOException { | |
| } catch (DfsLogger.LogClosedException ex) { | ||
| // ignore | ||
| } catch (IOException | RuntimeException ex) { | ||
| log.error("Unable to cleanly close log " + currentLog.getLogEntry() + ": " + ex, ex); | ||
| log.error("Unable to cleanly close log {}: {}", currentLog.getLogEntry(), ex, ex); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the first
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's okay to log the entire stack trace. I think getting rid of the one that inlines it as a toString is the unnecessary one. For an error, I'd like to see the stack trace to troubleshoot. It can be very useful. |
||
| } finally { | ||
| try { | ||
| this.tserver.walogClosed(currentLog); | ||
|
|
@@ -523,26 +522,25 @@ private CacheProvider createCacheProvider(TabletServerResourceManager resourceMg | |
| LoggingBlockCache.wrap(CacheType.DATA, resourceMgr.getDataCache())); | ||
| } | ||
|
|
||
| public boolean needsRecovery(ServerContext context, KeyExtent extent, Collection<LogEntry> walogs) | ||
| throws IOException { | ||
| public boolean needsRecovery(KeyExtent extent, Collection<LogEntry> walogs) throws IOException { | ||
| try { | ||
| var resourceMgr = tserver.getResourceManager(); | ||
| var cacheProvider = createCacheProvider(resourceMgr); | ||
| SortedLogRecovery recovery = | ||
| new SortedLogRecovery(context, resourceMgr.getFileLenCache(), cacheProvider); | ||
| new SortedLogRecovery(tserver.getContext(), resourceMgr.getFileLenCache(), cacheProvider); | ||
| return recovery.needsRecovery(extent, resolve(walogs)); | ||
| } catch (Exception e) { | ||
| throw new IOException(e); | ||
| } | ||
| } | ||
|
|
||
| public void recover(ServerContext context, KeyExtent extent, Collection<LogEntry> walogs, | ||
| Set<String> tabletFiles, MutationReceiver mr) throws IOException { | ||
| public void recover(KeyExtent extent, Collection<LogEntry> walogs, Set<String> tabletFiles, | ||
| MutationReceiver mr) throws IOException { | ||
| try { | ||
| var resourceMgr = tserver.getResourceManager(); | ||
| var cacheProvider = createCacheProvider(resourceMgr); | ||
| SortedLogRecovery recovery = | ||
| new SortedLogRecovery(context, resourceMgr.getFileLenCache(), cacheProvider); | ||
| new SortedLogRecovery(tserver.getContext(), resourceMgr.getFileLenCache(), cacheProvider); | ||
| recovery.recover(extent, resolve(walogs), tabletFiles, mr); | ||
| } catch (Exception e) { | ||
| throw new IOException(e); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this prints the entire stack trace, then I think this is a behavior change. Previously this would print
Exception.toString, not the entire stack trace. Suggest adding a placeholder to the message{}.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think showing the stack trace is fine for a warning. We have no expectation that log formats are going to be the same across versions, and it's better to have a stack trace when troubleshooting than be without it.